Browse code

Separate DWT from snow and dsputil

This moves the DWT functions from snow.c and dsputil.c to a file of
their own. A new struct, DWTContext, holds the function pointers
previously part of DSPContext.

Originally committed as revision 22522 to svn://svn.ffmpeg.org/ffmpeg/trunk

Måns Rullgård authored on 2010/03/15 02:50:12
Showing 12 changed files
... ...
@@ -894,6 +894,7 @@ CONFIG_LIST="
894 894
     bzlib
895 895
     dct
896 896
     doc
897
+    dwt
897 898
     dxva2
898 899
     fastdiv
899 900
     ffmpeg
... ...
@@ -1276,7 +1277,8 @@ rv30_decoder_select="golomb"
1276 1276
 rv40_decoder_select="golomb"
1277 1277
 shorten_decoder_select="golomb"
1278 1278
 sipr_decoder_select="lsp"
1279
-snow_encoder_select="aandct"
1279
+snow_decoder_select="dwt"
1280
+snow_encoder_select="aandct dwt"
1280 1281
 sonic_decoder_select="golomb"
1281 1282
 sonic_encoder_select="golomb"
1282 1283
 sonic_ls_encoder_select="golomb"
... ...
@@ -28,6 +28,7 @@ OBJS = allcodecs.o                                                      \
28 28
 OBJS-$(CONFIG_AANDCT)                  += aandcttab.o
29 29
 OBJS-$(CONFIG_ENCODERS)                += faandct.o jfdctfst.o jfdctint.o
30 30
 OBJS-$(CONFIG_DCT)                     += dct.o
31
+OBJS-$(CONFIG_DWT)                     += dwt.o
31 32
 OBJS-$(CONFIG_DXVA2)                   += dxva2.o
32 33
 FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o
33 34
 OBJS-$(CONFIG_FFT)                     += avfft.o fft.o $(FFT-OBJS-yes)
... ...
@@ -598,7 +599,7 @@ MMX-OBJS-$(CONFIG_CAVS_DECODER)        += x86/cavsdsp_mmx.o
598 598
 MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
599 599
 MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o
600 600
 MMX-OBJS-$(CONFIG_LPC)                 += x86/lpc_mmx.o
601
-MMX-OBJS-$(CONFIG_SNOW_DECODER)        += x86/snowdsp_mmx.o
601
+MMX-OBJS-$(CONFIG_DWT)                 += x86/snowdsp_mmx.o
602 602
 MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o
603 603
 MMX-OBJS-$(CONFIG_VP3_DECODER)         += x86/vp3dsp_mmx.o              \
604 604
                                           x86/vp3dsp_sse2.o
... ...
@@ -33,7 +33,6 @@
33 33
 #include "faandct.h"
34 34
 #include "faanidct.h"
35 35
 #include "mathops.h"
36
-#include "snow.h"
37 36
 #include "mpegvideo.h"
38 37
 #include "config.h"
39 38
 #include "lpc.h"
... ...
@@ -329,102 +328,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
329 329
     return s;
330 330
 }
331 331
 
332
-
333
-#if CONFIG_SNOW_ENCODER //dwt is in snow.c
334
-static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
335
-    int s, i, j;
336
-    const int dec_count= w==8 ? 3 : 4;
337
-    int tmp[32*32];
338
-    int level, ori;
339
-    static const int scale[2][2][4][4]={
340
-      {
341
-        {
342
-            // 9/7 8x8 dec=3
343
-            {268, 239, 239, 213},
344
-            {  0, 224, 224, 152},
345
-            {  0, 135, 135, 110},
346
-        },{
347
-            // 9/7 16x16 or 32x32 dec=4
348
-            {344, 310, 310, 280},
349
-            {  0, 320, 320, 228},
350
-            {  0, 175, 175, 136},
351
-            {  0, 129, 129, 102},
352
-        }
353
-      },{
354
-        {
355
-            // 5/3 8x8 dec=3
356
-            {275, 245, 245, 218},
357
-            {  0, 230, 230, 156},
358
-            {  0, 138, 138, 113},
359
-        },{
360
-            // 5/3 16x16 or 32x32 dec=4
361
-            {352, 317, 317, 286},
362
-            {  0, 328, 328, 233},
363
-            {  0, 180, 180, 140},
364
-            {  0, 132, 132, 105},
365
-        }
366
-      }
367
-    };
368
-
369
-    for (i = 0; i < h; i++) {
370
-        for (j = 0; j < w; j+=4) {
371
-            tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
372
-            tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
373
-            tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
374
-            tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
375
-        }
376
-        pix1 += line_size;
377
-        pix2 += line_size;
378
-    }
379
-
380
-    ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
381
-
382
-    s=0;
383
-    assert(w==h);
384
-    for(level=0; level<dec_count; level++){
385
-        for(ori= level ? 1 : 0; ori<4; ori++){
386
-            int size= w>>(dec_count-level);
387
-            int sx= (ori&1) ? size : 0;
388
-            int stride= 32<<(dec_count-level);
389
-            int sy= (ori&2) ? stride>>1 : 0;
390
-
391
-            for(i=0; i<size; i++){
392
-                for(j=0; j<size; j++){
393
-                    int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
394
-                    s += FFABS(v);
395
-                }
396
-            }
397
-        }
398
-    }
399
-    assert(s>=0);
400
-    return s>>9;
401
-}
402
-
403
-static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
404
-    return w_c(v, pix1, pix2, line_size,  8, h, 1);
405
-}
406
-
407
-static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
408
-    return w_c(v, pix1, pix2, line_size,  8, h, 0);
409
-}
410
-
411
-static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
412
-    return w_c(v, pix1, pix2, line_size, 16, h, 1);
413
-}
414
-
415
-static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
416
-    return w_c(v, pix1, pix2, line_size, 16, h, 0);
417
-}
418
-
419
-int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
420
-    return w_c(v, pix1, pix2, line_size, 32, h, 1);
421
-}
422
-
423
-int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
424
-    return w_c(v, pix1, pix2, line_size, 32, h, 0);
425
-}
426
-#endif
427
-
428 332
 /* draw the edges of width 'w' of an image of size width, height */
429 333
 //FIXME check that this is ok for mpeg4 interlaced
430 334
 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
... ...
@@ -3531,7 +3434,7 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
3531 3531
         case FF_CMP_NSSE:
3532 3532
             cmp[i]= c->nsse[i];
3533 3533
             break;
3534
-#if CONFIG_SNOW_ENCODER
3534
+#if CONFIG_DWT
3535 3535
         case FF_CMP_W53:
3536 3536
             cmp[i]= c->w53[i];
3537 3537
             break;
... ...
@@ -4816,11 +4719,8 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
4816 4816
     c->vsse[5]= vsse_intra8_c;
4817 4817
     c->nsse[0]= nsse16_c;
4818 4818
     c->nsse[1]= nsse8_c;
4819
-#if CONFIG_SNOW_ENCODER
4820
-    c->w53[0]= w53_16_c;
4821
-    c->w53[1]= w53_8_c;
4822
-    c->w97[0]= w97_16_c;
4823
-    c->w97[1]= w97_8_c;
4819
+#if CONFIG_DWT
4820
+    ff_dsputil_init_dwt(c);
4824 4821
 #endif
4825 4822
 
4826 4823
     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
... ...
@@ -4865,12 +4765,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
4865 4865
     c->try_8x8basis= try_8x8basis_c;
4866 4866
     c->add_8x8basis= add_8x8basis_c;
4867 4867
 
4868
-#if CONFIG_SNOW_DECODER
4869
-    c->vertical_compose97i = ff_snow_vertical_compose97i;
4870
-    c->horizontal_compose97i = ff_snow_horizontal_compose97i;
4871
-    c->inner_add_yblock = ff_snow_inner_add_yblock;
4872
-#endif
4873
-
4874 4868
 #if CONFIG_VORBIS_DECODER
4875 4869
     c->vorbis_inverse_coupling = vorbis_inverse_coupling;
4876 4870
 #endif
... ...
@@ -37,8 +37,6 @@
37 37
 //#define DEBUG
38 38
 /* dct code */
39 39
 typedef short DCTELEM;
40
-typedef int DWTELEM;
41
-typedef short IDWTELEM;
42 40
 
43 41
 void fdct_ifast (DCTELEM *data);
44 42
 void fdct_ifast248 (DCTELEM *data);
... ...
@@ -185,10 +183,6 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
185 185
 // although currently h<4 is not used as functions with width <8 are neither used nor implemented
186 186
 typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
187 187
 
188
-
189
-// for snow slices
190
-typedef struct slice_buffer_s slice_buffer;
191
-
192 188
 /**
193 189
  * Scantable.
194 190
  */
... ...
@@ -538,11 +532,6 @@ typedef struct DSPContext {
538 538
     void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
539 539
     void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
540 540
 
541
-    /* snow wavelet */
542
-    void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
543
-    void (*horizontal_compose97i)(IDWTELEM *b, int width);
544
-    void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
545
-
546 541
     void (*prefetch)(void *mem, int stride, int h);
547 542
 
548 543
     void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
... ...
@@ -681,6 +670,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
681 681
 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
682 682
 void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
683 683
 
684
+void ff_dsputil_init_dwt(DSPContext *c);
684 685
 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
685 686
 void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
686 687
 void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
687 688
new file mode 100644
... ...
@@ -0,0 +1,843 @@
0
+/*
1
+ * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/attributes.h"
21
+#include "dsputil.h"
22
+#include "dwt.h"
23
+
24
+void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
25
+{
26
+    int i;
27
+
28
+    buf->base_buffer = base_buffer;
29
+    buf->line_count = line_count;
30
+    buf->line_width = line_width;
31
+    buf->data_count = max_allocated_lines;
32
+    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
33
+    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
34
+
35
+    for(i = 0; i < max_allocated_lines; i++){
36
+        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
37
+    }
38
+
39
+    buf->data_stack_top = max_allocated_lines - 1;
40
+}
41
+
42
+IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
43
+{
44
+    IDWTELEM * buffer;
45
+
46
+    assert(buf->data_stack_top >= 0);
47
+//  assert(!buf->line[line]);
48
+    if (buf->line[line])
49
+        return buf->line[line];
50
+
51
+    buffer = buf->data_stack[buf->data_stack_top];
52
+    buf->data_stack_top--;
53
+    buf->line[line] = buffer;
54
+
55
+    return buffer;
56
+}
57
+
58
+void slice_buffer_release(slice_buffer * buf, int line)
59
+{
60
+    IDWTELEM * buffer;
61
+
62
+    assert(line >= 0 && line < buf->line_count);
63
+    assert(buf->line[line]);
64
+
65
+    buffer = buf->line[line];
66
+    buf->data_stack_top++;
67
+    buf->data_stack[buf->data_stack_top] = buffer;
68
+    buf->line[line] = NULL;
69
+}
70
+
71
+void slice_buffer_flush(slice_buffer * buf)
72
+{
73
+    int i;
74
+    for(i = 0; i < buf->line_count; i++){
75
+        if (buf->line[i])
76
+            slice_buffer_release(buf, i);
77
+    }
78
+}
79
+
80
+void slice_buffer_destroy(slice_buffer * buf)
81
+{
82
+    int i;
83
+    slice_buffer_flush(buf);
84
+
85
+    for(i = buf->data_count - 1; i >= 0; i--){
86
+        av_freep(&buf->data_stack[i]);
87
+    }
88
+    av_freep(&buf->data_stack);
89
+    av_freep(&buf->line);
90
+}
91
+
92
+static inline int mirror(int v, int m){
93
+    while((unsigned)v > (unsigned)m){
94
+        v=-v;
95
+        if(v<0) v+= 2*m;
96
+    }
97
+    return v;
98
+}
99
+
100
+static av_always_inline void
101
+lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
102
+     int dst_step, int src_step, int ref_step,
103
+     int width, int mul, int add, int shift,
104
+     int highpass, int inverse){
105
+    const int mirror_left= !highpass;
106
+    const int mirror_right= (width&1) ^ highpass;
107
+    const int w= (width>>1) - 1 + (highpass & width);
108
+    int i;
109
+
110
+#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
111
+    if(mirror_left){
112
+        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
113
+        dst += dst_step;
114
+        src += src_step;
115
+    }
116
+
117
+    for(i=0; i<w; i++){
118
+        dst[i*dst_step] =
119
+            LIFT(src[i*src_step],
120
+                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
121
+                 inverse);
122
+    }
123
+
124
+    if(mirror_right){
125
+        dst[w*dst_step] =
126
+            LIFT(src[w*src_step],
127
+                 ((mul*2*ref[w*ref_step]+add)>>shift),
128
+                 inverse);
129
+    }
130
+}
131
+
132
+static av_always_inline void
133
+inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
134
+         int dst_step, int src_step, int ref_step,
135
+         int width, int mul, int add, int shift,
136
+         int highpass, int inverse){
137
+    const int mirror_left= !highpass;
138
+    const int mirror_right= (width&1) ^ highpass;
139
+    const int w= (width>>1) - 1 + (highpass & width);
140
+    int i;
141
+
142
+#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
143
+    if(mirror_left){
144
+        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
145
+        dst += dst_step;
146
+        src += src_step;
147
+    }
148
+
149
+    for(i=0; i<w; i++){
150
+        dst[i*dst_step] =
151
+            LIFT(src[i*src_step],
152
+                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
153
+                 inverse);
154
+    }
155
+
156
+    if(mirror_right){
157
+        dst[w*dst_step] =
158
+            LIFT(src[w*src_step],
159
+                 ((mul*2*ref[w*ref_step]+add)>>shift),
160
+                 inverse);
161
+    }
162
+}
163
+
164
+#ifndef liftS
165
+static av_always_inline void
166
+liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
167
+      int dst_step, int src_step, int ref_step,
168
+      int width, int mul, int add, int shift,
169
+      int highpass, int inverse){
170
+    const int mirror_left= !highpass;
171
+    const int mirror_right= (width&1) ^ highpass;
172
+    const int w= (width>>1) - 1 + (highpass & width);
173
+    int i;
174
+
175
+    assert(shift == 4);
176
+#define LIFTS(src, ref, inv) \
177
+        ((inv) ? \
178
+            (src) + (((ref) + 4*(src))>>shift): \
179
+            -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
180
+    if(mirror_left){
181
+        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
182
+        dst += dst_step;
183
+        src += src_step;
184
+    }
185
+
186
+    for(i=0; i<w; i++){
187
+        dst[i*dst_step] =
188
+            LIFTS(src[i*src_step],
189
+                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
190
+                  inverse);
191
+    }
192
+
193
+    if(mirror_right){
194
+        dst[w*dst_step] =
195
+            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
196
+    }
197
+}
198
+static av_always_inline void
199
+inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
200
+          int dst_step, int src_step, int ref_step,
201
+          int width, int mul, int add, int shift,
202
+          int highpass, int inverse){
203
+    const int mirror_left= !highpass;
204
+    const int mirror_right= (width&1) ^ highpass;
205
+    const int w= (width>>1) - 1 + (highpass & width);
206
+    int i;
207
+
208
+    assert(shift == 4);
209
+#define LIFTS(src, ref, inv) \
210
+    ((inv) ? \
211
+        (src) + (((ref) + 4*(src))>>shift): \
212
+        -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
213
+    if(mirror_left){
214
+        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
215
+        dst += dst_step;
216
+        src += src_step;
217
+    }
218
+
219
+    for(i=0; i<w; i++){
220
+        dst[i*dst_step] =
221
+            LIFTS(src[i*src_step],
222
+                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
223
+                  inverse);
224
+    }
225
+
226
+    if(mirror_right){
227
+        dst[w*dst_step] =
228
+            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
229
+    }
230
+}
231
+#endif /* ! liftS */
232
+
233
+static void horizontal_decompose53i(DWTELEM *b, int width){
234
+    DWTELEM temp[width];
235
+    const int width2= width>>1;
236
+    int x;
237
+    const int w2= (width+1)>>1;
238
+
239
+    for(x=0; x<width2; x++){
240
+        temp[x   ]= b[2*x    ];
241
+        temp[x+w2]= b[2*x + 1];
242
+    }
243
+    if(width&1)
244
+        temp[x   ]= b[2*x    ];
245
+#if 0
246
+    {
247
+    int A1,A2,A3,A4;
248
+    A2= temp[1       ];
249
+    A4= temp[0       ];
250
+    A1= temp[0+width2];
251
+    A1 -= (A2 + A4)>>1;
252
+    A4 += (A1 + 1)>>1;
253
+    b[0+width2] = A1;
254
+    b[0       ] = A4;
255
+    for(x=1; x+1<width2; x+=2){
256
+        A3= temp[x+width2];
257
+        A4= temp[x+1     ];
258
+        A3 -= (A2 + A4)>>1;
259
+        A2 += (A1 + A3 + 2)>>2;
260
+        b[x+width2] = A3;
261
+        b[x       ] = A2;
262
+
263
+        A1= temp[x+1+width2];
264
+        A2= temp[x+2       ];
265
+        A1 -= (A2 + A4)>>1;
266
+        A4 += (A1 + A3 + 2)>>2;
267
+        b[x+1+width2] = A1;
268
+        b[x+1       ] = A4;
269
+    }
270
+    A3= temp[width-1];
271
+    A3 -= A2;
272
+    A2 += (A1 + A3 + 2)>>2;
273
+    b[width -1] = A3;
274
+    b[width2-1] = A2;
275
+    }
276
+#else
277
+    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
278
+    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
279
+#endif /* 0 */
280
+}
281
+
282
+static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
283
+    int i;
284
+
285
+    for(i=0; i<width; i++){
286
+        b1[i] -= (b0[i] + b2[i])>>1;
287
+    }
288
+}
289
+
290
+static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
291
+    int i;
292
+
293
+    for(i=0; i<width; i++){
294
+        b1[i] += (b0[i] + b2[i] + 2)>>2;
295
+    }
296
+}
297
+
298
+static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
299
+    int y;
300
+    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
301
+    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
302
+
303
+    for(y=-2; y<height; y+=2){
304
+        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
305
+        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
306
+
307
+        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
308
+        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
309
+
310
+        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
311
+        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
312
+
313
+        b0=b2;
314
+        b1=b3;
315
+    }
316
+}
317
+
318
+static void horizontal_decompose97i(DWTELEM *b, int width){
319
+    DWTELEM temp[width];
320
+    const int w2= (width+1)>>1;
321
+
322
+    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
323
+    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
324
+    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
325
+    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
326
+}
327
+
328
+
329
+static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
330
+    int i;
331
+
332
+    for(i=0; i<width; i++){
333
+        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
334
+    }
335
+}
336
+
337
+static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
338
+    int i;
339
+
340
+    for(i=0; i<width; i++){
341
+        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
342
+    }
343
+}
344
+
345
+static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
346
+    int i;
347
+
348
+    for(i=0; i<width; i++){
349
+#ifdef liftS
350
+        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
351
+#else
352
+        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
353
+#endif
354
+    }
355
+}
356
+
357
+static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
358
+    int i;
359
+
360
+    for(i=0; i<width; i++){
361
+        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
362
+    }
363
+}
364
+
365
+static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
366
+    int y;
367
+    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
368
+    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
369
+    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
370
+    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
371
+
372
+    for(y=-4; y<height; y+=2){
373
+        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
374
+        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
375
+
376
+        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
377
+        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
378
+
379
+        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
380
+        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
381
+        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
382
+        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
383
+
384
+        b0=b2;
385
+        b1=b3;
386
+        b2=b4;
387
+        b3=b5;
388
+    }
389
+}
390
+
391
+void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
392
+    int level;
393
+
394
+    for(level=0; level<decomposition_count; level++){
395
+        switch(type){
396
+        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
397
+        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
398
+        }
399
+    }
400
+}
401
+
402
+static void horizontal_compose53i(IDWTELEM *b, int width){
403
+    IDWTELEM temp[width];
404
+    const int width2= width>>1;
405
+    const int w2= (width+1)>>1;
406
+    int x;
407
+
408
+    for(x=0; x<width2; x++){
409
+        temp[2*x    ]= b[x   ];
410
+        temp[2*x + 1]= b[x+w2];
411
+    }
412
+    if(width&1)
413
+        temp[2*x    ]= b[x   ];
414
+
415
+    b[0] = temp[0] - ((temp[1]+1)>>1);
416
+    for(x=2; x<width-1; x+=2){
417
+        b[x  ] = temp[x  ] - ((temp[x-1] + temp[x+1]+2)>>2);
418
+        b[x-1] = temp[x-1] + ((b   [x-2] + b   [x  ]+1)>>1);
419
+    }
420
+    if(width&1){
421
+        b[x  ] = temp[x  ] - ((temp[x-1]+1)>>1);
422
+        b[x-1] = temp[x-1] + ((b   [x-2] + b  [x  ]+1)>>1);
423
+    }else
424
+        b[x-1] = temp[x-1] + b[x-2];
425
+}
426
+
427
+static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
428
+    int i;
429
+
430
+    for(i=0; i<width; i++){
431
+        b1[i] += (b0[i] + b2[i])>>1;
432
+    }
433
+}
434
+
435
+static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
436
+    int i;
437
+
438
+    for(i=0; i<width; i++){
439
+        b1[i] -= (b0[i] + b2[i] + 2)>>2;
440
+    }
441
+}
442
+
443
+static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
444
+    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
445
+    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
446
+    cs->y = -1;
447
+}
448
+
449
+static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
450
+    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
451
+    cs->b1 = buffer + mirror(-1  , height-1)*stride;
452
+    cs->y = -1;
453
+}
454
+
455
+static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
456
+    int y= cs->y;
457
+
458
+    IDWTELEM *b0= cs->b0;
459
+    IDWTELEM *b1= cs->b1;
460
+    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
461
+    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
462
+
463
+    if(y+1<(unsigned)height && y<(unsigned)height){
464
+        int x;
465
+
466
+        for(x=0; x<width; x++){
467
+            b2[x] -= (b1[x] + b3[x] + 2)>>2;
468
+            b1[x] += (b0[x] + b2[x])>>1;
469
+        }
470
+    }else{
471
+        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
472
+        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
473
+    }
474
+
475
+        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
476
+        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
477
+
478
+    cs->b0 = b2;
479
+    cs->b1 = b3;
480
+    cs->y += 2;
481
+}
482
+
483
+static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
484
+    int y= cs->y;
485
+    IDWTELEM *b0= cs->b0;
486
+    IDWTELEM *b1= cs->b1;
487
+    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
488
+    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
489
+
490
+        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
491
+        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
492
+
493
+        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
494
+        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
495
+
496
+    cs->b0 = b2;
497
+    cs->b1 = b3;
498
+    cs->y += 2;
499
+}
500
+
501
+static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
502
+    DWTCompose cs;
503
+    spatial_compose53i_init(&cs, buffer, height, stride);
504
+    while(cs.y <= height)
505
+        spatial_compose53i_dy(&cs, buffer, width, height, stride);
506
+}
507
+
508
+
509
+void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
510
+    IDWTELEM temp[width];
511
+    const int w2= (width+1)>>1;
512
+
513
+#if 0 //maybe more understadable but slower
514
+    inv_lift (temp   , b      , b   +w2, 2, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
515
+    inv_lift (temp+1 , b   +w2, temp   , 2, 1, 2, width,  W_CM, W_CO, W_CS, 1, 1);
516
+
517
+    inv_liftS(b      , temp   , temp+1 , 2, 2, 2, width,  W_BM, W_BO, W_BS, 0, 1);
518
+    inv_lift (b+1    , temp+1 , b      , 2, 2, 2, width,  W_AM, W_AO, W_AS, 1, 0);
519
+#else
520
+    int x;
521
+    temp[0] = b[0] - ((3*b[w2]+2)>>2);
522
+    for(x=1; x<(width>>1); x++){
523
+        temp[2*x  ] = b[x     ] - ((3*(b   [x+w2-1] + b[x+w2])+4)>>3);
524
+        temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
525
+    }
526
+    if(width&1){
527
+        temp[2*x  ] = b[x     ] - ((3*b   [x+w2-1]+2)>>2);
528
+        temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
529
+    }else
530
+        temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
531
+
532
+    b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
533
+    for(x=2; x<width-1; x+=2){
534
+        b[x  ] = temp[x  ] + ((4*temp[x  ] + temp[x-1] + temp[x+1]+8)>>4);
535
+        b[x-1] = temp[x-1] + ((3*(b  [x-2] + b   [x  ] ))>>1);
536
+    }
537
+    if(width&1){
538
+        b[x  ] = temp[x  ] + ((2*temp[x  ] + temp[x-1]+4)>>3);
539
+        b[x-1] = temp[x-1] + ((3*(b  [x-2] + b   [x  ] ))>>1);
540
+    }else
541
+        b[x-1] = temp[x-1] + 3*b [x-2];
542
+#endif
543
+}
544
+
545
+static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
546
+    int i;
547
+
548
+    for(i=0; i<width; i++){
549
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
550
+    }
551
+}
552
+
553
+static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
554
+    int i;
555
+
556
+    for(i=0; i<width; i++){
557
+        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
558
+    }
559
+}
560
+
561
+static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
562
+    int i;
563
+
564
+    for(i=0; i<width; i++){
565
+#ifdef liftS
566
+        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
567
+#else
568
+        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
569
+#endif
570
+    }
571
+}
572
+
573
+static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
574
+    int i;
575
+
576
+    for(i=0; i<width; i++){
577
+        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
578
+    }
579
+}
580
+
581
+void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
582
+    int i;
583
+
584
+    for(i=0; i<width; i++){
585
+        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
586
+        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
587
+#ifdef liftS
588
+        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
589
+#else
590
+        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
591
+#endif
592
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
593
+    }
594
+}
595
+
596
+static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
597
+    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
598
+    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
599
+    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
600
+    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
601
+    cs->y = -3;
602
+}
603
+
604
+static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
605
+    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
606
+    cs->b1 = buffer + mirror(-3  , height-1)*stride;
607
+    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
608
+    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
609
+    cs->y = -3;
610
+}
611
+
612
+static void spatial_compose97i_dy_buffered(DWTContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
613
+    int y = cs->y;
614
+
615
+    IDWTELEM *b0= cs->b0;
616
+    IDWTELEM *b1= cs->b1;
617
+    IDWTELEM *b2= cs->b2;
618
+    IDWTELEM *b3= cs->b3;
619
+    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
620
+    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
621
+
622
+    if(y>0 && y+4<height){
623
+        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
624
+    }else{
625
+        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
626
+        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
627
+        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
628
+        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
629
+    }
630
+
631
+    if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
632
+    if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
633
+
634
+    cs->b0=b2;
635
+    cs->b1=b3;
636
+    cs->b2=b4;
637
+    cs->b3=b5;
638
+    cs->y += 2;
639
+}
640
+
641
+static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
642
+    int y = cs->y;
643
+    IDWTELEM *b0= cs->b0;
644
+    IDWTELEM *b1= cs->b1;
645
+    IDWTELEM *b2= cs->b2;
646
+    IDWTELEM *b3= cs->b3;
647
+    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
648
+    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
649
+
650
+    if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
651
+    if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
652
+    if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
653
+    if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
654
+
655
+    if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
656
+    if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
657
+
658
+    cs->b0=b2;
659
+    cs->b1=b3;
660
+    cs->b2=b4;
661
+    cs->b3=b5;
662
+    cs->y += 2;
663
+}
664
+
665
+static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
666
+    DWTCompose cs;
667
+    spatial_compose97i_init(&cs, buffer, height, stride);
668
+    while(cs.y <= height)
669
+        spatial_compose97i_dy(&cs, buffer, width, height, stride);
670
+}
671
+
672
+void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
673
+    int level;
674
+    for(level=decomposition_count-1; level>=0; level--){
675
+        switch(type){
676
+        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
677
+        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
678
+        }
679
+    }
680
+}
681
+
682
+void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
683
+    const int support = type==1 ? 3 : 5;
684
+    int level;
685
+    if(type==2) return;
686
+
687
+    for(level=decomposition_count-1; level>=0; level--){
688
+        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
689
+            switch(type){
690
+            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
691
+                break;
692
+            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
693
+                break;
694
+            }
695
+        }
696
+    }
697
+}
698
+
699
+void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
700
+    int level;
701
+    for(level=decomposition_count-1; level>=0; level--){
702
+        switch(type){
703
+        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
704
+        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
705
+        }
706
+    }
707
+}
708
+
709
+void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
710
+    const int support = type==1 ? 3 : 5;
711
+    int level;
712
+    if(type==2) return;
713
+
714
+    for(level=decomposition_count-1; level>=0; level--){
715
+        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
716
+            switch(type){
717
+            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
718
+                break;
719
+            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
720
+                break;
721
+            }
722
+        }
723
+    }
724
+}
725
+
726
+void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
727
+        DWTCompose cs[MAX_DECOMPOSITIONS];
728
+        int y;
729
+        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
730
+        for(y=0; y<height; y+=4)
731
+            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
732
+}
733
+
734
+static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
735
+    int s, i, j;
736
+    const int dec_count= w==8 ? 3 : 4;
737
+    int tmp[32*32];
738
+    int level, ori;
739
+    static const int scale[2][2][4][4]={
740
+      {
741
+        {
742
+            // 9/7 8x8 dec=3
743
+            {268, 239, 239, 213},
744
+            {  0, 224, 224, 152},
745
+            {  0, 135, 135, 110},
746
+        },{
747
+            // 9/7 16x16 or 32x32 dec=4
748
+            {344, 310, 310, 280},
749
+            {  0, 320, 320, 228},
750
+            {  0, 175, 175, 136},
751
+            {  0, 129, 129, 102},
752
+        }
753
+      },{
754
+        {
755
+            // 5/3 8x8 dec=3
756
+            {275, 245, 245, 218},
757
+            {  0, 230, 230, 156},
758
+            {  0, 138, 138, 113},
759
+        },{
760
+            // 5/3 16x16 or 32x32 dec=4
761
+            {352, 317, 317, 286},
762
+            {  0, 328, 328, 233},
763
+            {  0, 180, 180, 140},
764
+            {  0, 132, 132, 105},
765
+        }
766
+      }
767
+    };
768
+
769
+    for (i = 0; i < h; i++) {
770
+        for (j = 0; j < w; j+=4) {
771
+            tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
772
+            tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
773
+            tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
774
+            tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
775
+        }
776
+        pix1 += line_size;
777
+        pix2 += line_size;
778
+    }
779
+
780
+    ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
781
+
782
+    s=0;
783
+    assert(w==h);
784
+    for(level=0; level<dec_count; level++){
785
+        for(ori= level ? 1 : 0; ori<4; ori++){
786
+            int size= w>>(dec_count-level);
787
+            int sx= (ori&1) ? size : 0;
788
+            int stride= 32<<(dec_count-level);
789
+            int sy= (ori&2) ? stride>>1 : 0;
790
+
791
+            for(i=0; i<size; i++){
792
+                for(j=0; j<size; j++){
793
+                    int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
794
+                    s += FFABS(v);
795
+                }
796
+            }
797
+        }
798
+    }
799
+    assert(s>=0);
800
+    return s>>9;
801
+}
802
+
803
+static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
804
+    return w_c(v, pix1, pix2, line_size,  8, h, 1);
805
+}
806
+
807
+static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
808
+    return w_c(v, pix1, pix2, line_size,  8, h, 0);
809
+}
810
+
811
+static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
812
+    return w_c(v, pix1, pix2, line_size, 16, h, 1);
813
+}
814
+
815
+static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
816
+    return w_c(v, pix1, pix2, line_size, 16, h, 0);
817
+}
818
+
819
+int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
820
+    return w_c(v, pix1, pix2, line_size, 32, h, 1);
821
+}
822
+
823
+int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
824
+    return w_c(v, pix1, pix2, line_size, 32, h, 0);
825
+}
826
+
827
+void ff_dsputil_init_dwt(DSPContext *c)
828
+{
829
+    c->w53[0]= w53_16_c;
830
+    c->w53[1]= w53_8_c;
831
+    c->w97[0]= w97_16_c;
832
+    c->w97[1]= w97_8_c;
833
+}
834
+
835
+void ff_dwt_init(DWTContext *c)
836
+{
837
+    c->vertical_compose97i = ff_snow_vertical_compose97i;
838
+    c->horizontal_compose97i = ff_snow_horizontal_compose97i;
839
+    c->inner_add_yblock = ff_snow_inner_add_yblock;
840
+
841
+    if (ARCH_X86) ff_dwt_init_x86(c);
842
+}
0 843
new file mode 100644
... ...
@@ -0,0 +1,156 @@
0
+/*
1
+ * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#ifndef AVCODEC_DWT_H
21
+#define AVCODEC_DWT_H
22
+
23
+#include <stdint.h>
24
+
25
+typedef int DWTELEM;
26
+typedef short IDWTELEM;
27
+
28
+typedef struct {
29
+    IDWTELEM *b0;
30
+    IDWTELEM *b1;
31
+    IDWTELEM *b2;
32
+    IDWTELEM *b3;
33
+    int y;
34
+} DWTCompose;
35
+
36
+/** Used to minimize the amount of memory used in order to optimize cache performance. **/
37
+typedef struct slice_buffer_s {
38
+    IDWTELEM * * line; ///< For use by idwt and predict_slices.
39
+    IDWTELEM * * data_stack; ///< Used for internal purposes.
40
+    int data_stack_top;
41
+    int line_count;
42
+    int line_width;
43
+    int data_count;
44
+    IDWTELEM * base_buffer; ///< Buffer that this structure is caching.
45
+} slice_buffer;
46
+
47
+typedef struct DWTContext {
48
+    void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
49
+    void (*horizontal_compose97i)(IDWTELEM *b, int width);
50
+    void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
51
+} DWTContext;
52
+
53
+#define MAX_DECOMPOSITIONS 8
54
+
55
+#define DWT_97 0
56
+#define DWT_53 1
57
+
58
+#define liftS lift
59
+#if 1
60
+#define W_AM 3
61
+#define W_AO 0
62
+#define W_AS 1
63
+
64
+#undef liftS
65
+#define W_BM 1
66
+#define W_BO 8
67
+#define W_BS 4
68
+
69
+#define W_CM 1
70
+#define W_CO 0
71
+#define W_CS 0
72
+
73
+#define W_DM 3
74
+#define W_DO 4
75
+#define W_DS 3
76
+#elif 0
77
+#define W_AM 55
78
+#define W_AO 16
79
+#define W_AS 5
80
+
81
+#define W_BM 3
82
+#define W_BO 32
83
+#define W_BS 6
84
+
85
+#define W_CM 127
86
+#define W_CO 64
87
+#define W_CS 7
88
+
89
+#define W_DM 7
90
+#define W_DO 8
91
+#define W_DS 4
92
+#elif 0
93
+#define W_AM 97
94
+#define W_AO 32
95
+#define W_AS 6
96
+
97
+#define W_BM 63
98
+#define W_BO 512
99
+#define W_BS 10
100
+
101
+#define W_CM 13
102
+#define W_CO 8
103
+#define W_CS 4
104
+
105
+#define W_DM 15
106
+#define W_DO 16
107
+#define W_DS 5
108
+
109
+#else
110
+
111
+#define W_AM 203
112
+#define W_AO 64
113
+#define W_AS 7
114
+
115
+#define W_BM 217
116
+#define W_BO 2048
117
+#define W_BS 12
118
+
119
+#define W_CM 113
120
+#define W_CO 64
121
+#define W_CS 7
122
+
123
+#define W_DM 227
124
+#define W_DO 128
125
+#define W_DS 9
126
+#endif
127
+
128
+#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
129
+//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
130
+
131
+void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer);
132
+void slice_buffer_release(slice_buffer * buf, int line);
133
+void slice_buffer_flush(slice_buffer * buf);
134
+void slice_buffer_destroy(slice_buffer * buf);
135
+IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line);
136
+
137
+void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
138
+void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
139
+void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
140
+
141
+int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
142
+int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
143
+
144
+void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
145
+
146
+void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count);
147
+void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y);
148
+void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count);
149
+void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y);
150
+void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count);
151
+
152
+void ff_dwt_init(DWTContext *c);
153
+void ff_dwt_init_x86(DWTContext *c);
154
+
155
+#endif /* AVCODEC_DWT_H */
... ...
@@ -28,6 +28,7 @@
28 28
 
29 29
 #include "avcodec.h"
30 30
 #include "dsputil.h"
31
+#include "dwt.h"
31 32
 #include "ivi_common.h"
32 33
 #include "ivi_dsp.h"
33 34
 
... ...
@@ -21,6 +21,7 @@
21 21
 #include "libavutil/intmath.h"
22 22
 #include "avcodec.h"
23 23
 #include "dsputil.h"
24
+#include "dwt.h"
24 25
 #include "snow.h"
25 26
 
26 27
 #include "rangecoder.h"
... ...
@@ -440,6 +441,7 @@ typedef struct SnowContext{
440 440
     AVCodecContext *avctx;
441 441
     RangeCoder c;
442 442
     DSPContext dsp;
443
+    DWTContext dwt;
443 444
     AVFrame new_picture;
444 445
     AVFrame input_picture;              ///< new_picture with the internal linesizes
445 446
     AVFrame current_picture;
... ...
@@ -494,85 +496,6 @@ typedef struct SnowContext{
494 494
     uint8_t *scratchbuf;
495 495
 }SnowContext;
496 496
 
497
-typedef struct {
498
-    IDWTELEM *b0;
499
-    IDWTELEM *b1;
500
-    IDWTELEM *b2;
501
-    IDWTELEM *b3;
502
-    int y;
503
-} DWTCompose;
504
-
505
-#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
506
-//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
507
-
508
-static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
509
-{
510
-    int i;
511
-
512
-    buf->base_buffer = base_buffer;
513
-    buf->line_count = line_count;
514
-    buf->line_width = line_width;
515
-    buf->data_count = max_allocated_lines;
516
-    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
517
-    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
518
-
519
-    for(i = 0; i < max_allocated_lines; i++){
520
-        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
521
-    }
522
-
523
-    buf->data_stack_top = max_allocated_lines - 1;
524
-}
525
-
526
-static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
527
-{
528
-    IDWTELEM * buffer;
529
-
530
-    assert(buf->data_stack_top >= 0);
531
-//  assert(!buf->line[line]);
532
-    if (buf->line[line])
533
-        return buf->line[line];
534
-
535
-    buffer = buf->data_stack[buf->data_stack_top];
536
-    buf->data_stack_top--;
537
-    buf->line[line] = buffer;
538
-
539
-    return buffer;
540
-}
541
-
542
-static void slice_buffer_release(slice_buffer * buf, int line)
543
-{
544
-    IDWTELEM * buffer;
545
-
546
-    assert(line >= 0 && line < buf->line_count);
547
-    assert(buf->line[line]);
548
-
549
-    buffer = buf->line[line];
550
-    buf->data_stack_top++;
551
-    buf->data_stack[buf->data_stack_top] = buffer;
552
-    buf->line[line] = NULL;
553
-}
554
-
555
-static void slice_buffer_flush(slice_buffer * buf)
556
-{
557
-    int i;
558
-    for(i = 0; i < buf->line_count; i++){
559
-        if (buf->line[i])
560
-            slice_buffer_release(buf, i);
561
-    }
562
-}
563
-
564
-static void slice_buffer_destroy(slice_buffer * buf)
565
-{
566
-    int i;
567
-    slice_buffer_flush(buf);
568
-
569
-    for(i = buf->data_count - 1; i >= 0; i--){
570
-        av_freep(&buf->data_stack[i]);
571
-    }
572
-    av_freep(&buf->data_stack);
573
-    av_freep(&buf->line);
574
-}
575
-
576 497
 #ifdef __sgi
577 498
 // Avoid a name clash on SGI IRIX
578 499
 #undef qexp
... ...
@@ -580,14 +503,6 @@ static void slice_buffer_destroy(slice_buffer * buf)
580 580
 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
581 581
 static uint8_t qexp[QROOT];
582 582
 
583
-static inline int mirror(int v, int m){
584
-    while((unsigned)v > (unsigned)m){
585
-        v=-v;
586
-        if(v<0) v+= 2*m;
587
-    }
588
-    return v;
589
-}
590
-
591 583
 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
592 584
     int i;
593 585
 
... ...
@@ -709,605 +624,6 @@ static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
709 709
     return v;
710 710
 }
711 711
 
712
-static av_always_inline void
713
-lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
714
-     int dst_step, int src_step, int ref_step,
715
-     int width, int mul, int add, int shift,
716
-     int highpass, int inverse){
717
-    const int mirror_left= !highpass;
718
-    const int mirror_right= (width&1) ^ highpass;
719
-    const int w= (width>>1) - 1 + (highpass & width);
720
-    int i;
721
-
722
-#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
723
-    if(mirror_left){
724
-        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
725
-        dst += dst_step;
726
-        src += src_step;
727
-    }
728
-
729
-    for(i=0; i<w; i++){
730
-        dst[i*dst_step] =
731
-            LIFT(src[i*src_step],
732
-                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
733
-                 inverse);
734
-    }
735
-
736
-    if(mirror_right){
737
-        dst[w*dst_step] =
738
-            LIFT(src[w*src_step],
739
-                 ((mul*2*ref[w*ref_step]+add)>>shift),
740
-                 inverse);
741
-    }
742
-}
743
-
744
-static av_always_inline void
745
-inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
746
-         int dst_step, int src_step, int ref_step,
747
-         int width, int mul, int add, int shift,
748
-         int highpass, int inverse){
749
-    const int mirror_left= !highpass;
750
-    const int mirror_right= (width&1) ^ highpass;
751
-    const int w= (width>>1) - 1 + (highpass & width);
752
-    int i;
753
-
754
-#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
755
-    if(mirror_left){
756
-        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
757
-        dst += dst_step;
758
-        src += src_step;
759
-    }
760
-
761
-    for(i=0; i<w; i++){
762
-        dst[i*dst_step] =
763
-            LIFT(src[i*src_step],
764
-                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
765
-                 inverse);
766
-    }
767
-
768
-    if(mirror_right){
769
-        dst[w*dst_step] =
770
-            LIFT(src[w*src_step],
771
-                 ((mul*2*ref[w*ref_step]+add)>>shift),
772
-                 inverse);
773
-    }
774
-}
775
-
776
-#ifndef liftS
777
-static av_always_inline void
778
-liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
779
-      int dst_step, int src_step, int ref_step,
780
-      int width, int mul, int add, int shift,
781
-      int highpass, int inverse){
782
-    const int mirror_left= !highpass;
783
-    const int mirror_right= (width&1) ^ highpass;
784
-    const int w= (width>>1) - 1 + (highpass & width);
785
-    int i;
786
-
787
-    assert(shift == 4);
788
-#define LIFTS(src, ref, inv) \
789
-        ((inv) ? \
790
-            (src) + (((ref) + 4*(src))>>shift): \
791
-            -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
792
-    if(mirror_left){
793
-        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
794
-        dst += dst_step;
795
-        src += src_step;
796
-    }
797
-
798
-    for(i=0; i<w; i++){
799
-        dst[i*dst_step] =
800
-            LIFTS(src[i*src_step],
801
-                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
802
-                  inverse);
803
-    }
804
-
805
-    if(mirror_right){
806
-        dst[w*dst_step] =
807
-            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
808
-    }
809
-}
810
-static av_always_inline void
811
-inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
812
-          int dst_step, int src_step, int ref_step,
813
-          int width, int mul, int add, int shift,
814
-          int highpass, int inverse){
815
-    const int mirror_left= !highpass;
816
-    const int mirror_right= (width&1) ^ highpass;
817
-    const int w= (width>>1) - 1 + (highpass & width);
818
-    int i;
819
-
820
-    assert(shift == 4);
821
-#define LIFTS(src, ref, inv) \
822
-    ((inv) ? \
823
-        (src) + (((ref) + 4*(src))>>shift): \
824
-        -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
825
-    if(mirror_left){
826
-        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
827
-        dst += dst_step;
828
-        src += src_step;
829
-    }
830
-
831
-    for(i=0; i<w; i++){
832
-        dst[i*dst_step] =
833
-            LIFTS(src[i*src_step],
834
-                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
835
-                  inverse);
836
-    }
837
-
838
-    if(mirror_right){
839
-        dst[w*dst_step] =
840
-            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
841
-    }
842
-}
843
-#endif /* ! liftS */
844
-
845
-static void horizontal_decompose53i(DWTELEM *b, int width){
846
-    DWTELEM temp[width];
847
-    const int width2= width>>1;
848
-    int x;
849
-    const int w2= (width+1)>>1;
850
-
851
-    for(x=0; x<width2; x++){
852
-        temp[x   ]= b[2*x    ];
853
-        temp[x+w2]= b[2*x + 1];
854
-    }
855
-    if(width&1)
856
-        temp[x   ]= b[2*x    ];
857
-#if 0
858
-    {
859
-    int A1,A2,A3,A4;
860
-    A2= temp[1       ];
861
-    A4= temp[0       ];
862
-    A1= temp[0+width2];
863
-    A1 -= (A2 + A4)>>1;
864
-    A4 += (A1 + 1)>>1;
865
-    b[0+width2] = A1;
866
-    b[0       ] = A4;
867
-    for(x=1; x+1<width2; x+=2){
868
-        A3= temp[x+width2];
869
-        A4= temp[x+1     ];
870
-        A3 -= (A2 + A4)>>1;
871
-        A2 += (A1 + A3 + 2)>>2;
872
-        b[x+width2] = A3;
873
-        b[x       ] = A2;
874
-
875
-        A1= temp[x+1+width2];
876
-        A2= temp[x+2       ];
877
-        A1 -= (A2 + A4)>>1;
878
-        A4 += (A1 + A3 + 2)>>2;
879
-        b[x+1+width2] = A1;
880
-        b[x+1       ] = A4;
881
-    }
882
-    A3= temp[width-1];
883
-    A3 -= A2;
884
-    A2 += (A1 + A3 + 2)>>2;
885
-    b[width -1] = A3;
886
-    b[width2-1] = A2;
887
-    }
888
-#else
889
-    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
890
-    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
891
-#endif /* 0 */
892
-}
893
-
894
-static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
895
-    int i;
896
-
897
-    for(i=0; i<width; i++){
898
-        b1[i] -= (b0[i] + b2[i])>>1;
899
-    }
900
-}
901
-
902
-static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
903
-    int i;
904
-
905
-    for(i=0; i<width; i++){
906
-        b1[i] += (b0[i] + b2[i] + 2)>>2;
907
-    }
908
-}
909
-
910
-static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
911
-    int y;
912
-    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
913
-    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
914
-
915
-    for(y=-2; y<height; y+=2){
916
-        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
917
-        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
918
-
919
-        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
920
-        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
921
-
922
-        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
923
-        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
924
-
925
-        b0=b2;
926
-        b1=b3;
927
-    }
928
-}
929
-
930
-static void horizontal_decompose97i(DWTELEM *b, int width){
931
-    DWTELEM temp[width];
932
-    const int w2= (width+1)>>1;
933
-
934
-    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
935
-    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
936
-    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
937
-    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
938
-}
939
-
940
-
941
-static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
942
-    int i;
943
-
944
-    for(i=0; i<width; i++){
945
-        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
946
-    }
947
-}
948
-
949
-static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
950
-    int i;
951
-
952
-    for(i=0; i<width; i++){
953
-        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
954
-    }
955
-}
956
-
957
-static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
958
-    int i;
959
-
960
-    for(i=0; i<width; i++){
961
-#ifdef liftS
962
-        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
963
-#else
964
-        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
965
-#endif
966
-    }
967
-}
968
-
969
-static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
970
-    int i;
971
-
972
-    for(i=0; i<width; i++){
973
-        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
974
-    }
975
-}
976
-
977
-static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
978
-    int y;
979
-    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
980
-    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
981
-    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
982
-    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
983
-
984
-    for(y=-4; y<height; y+=2){
985
-        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
986
-        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
987
-
988
-        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
989
-        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
990
-
991
-        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
992
-        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
993
-        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
994
-        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
995
-
996
-        b0=b2;
997
-        b1=b3;
998
-        b2=b4;
999
-        b3=b5;
1000
-    }
1001
-}
1002
-
1003
-void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1004
-    int level;
1005
-
1006
-    for(level=0; level<decomposition_count; level++){
1007
-        switch(type){
1008
-        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1009
-        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1010
-        }
1011
-    }
1012
-}
1013
-
1014
-static void horizontal_compose53i(IDWTELEM *b, int width){
1015
-    IDWTELEM temp[width];
1016
-    const int width2= width>>1;
1017
-    const int w2= (width+1)>>1;
1018
-    int x;
1019
-
1020
-    for(x=0; x<width2; x++){
1021
-        temp[2*x    ]= b[x   ];
1022
-        temp[2*x + 1]= b[x+w2];
1023
-    }
1024
-    if(width&1)
1025
-        temp[2*x    ]= b[x   ];
1026
-
1027
-    b[0] = temp[0] - ((temp[1]+1)>>1);
1028
-    for(x=2; x<width-1; x+=2){
1029
-        b[x  ] = temp[x  ] - ((temp[x-1] + temp[x+1]+2)>>2);
1030
-        b[x-1] = temp[x-1] + ((b   [x-2] + b   [x  ]+1)>>1);
1031
-    }
1032
-    if(width&1){
1033
-        b[x  ] = temp[x  ] - ((temp[x-1]+1)>>1);
1034
-        b[x-1] = temp[x-1] + ((b   [x-2] + b  [x  ]+1)>>1);
1035
-    }else
1036
-        b[x-1] = temp[x-1] + b[x-2];
1037
-}
1038
-
1039
-static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1040
-    int i;
1041
-
1042
-    for(i=0; i<width; i++){
1043
-        b1[i] += (b0[i] + b2[i])>>1;
1044
-    }
1045
-}
1046
-
1047
-static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1048
-    int i;
1049
-
1050
-    for(i=0; i<width; i++){
1051
-        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1052
-    }
1053
-}
1054
-
1055
-static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1056
-    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1057
-    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1058
-    cs->y = -1;
1059
-}
1060
-
1061
-static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1062
-    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1063
-    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1064
-    cs->y = -1;
1065
-}
1066
-
1067
-static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1068
-    int y= cs->y;
1069
-
1070
-    IDWTELEM *b0= cs->b0;
1071
-    IDWTELEM *b1= cs->b1;
1072
-    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1073
-    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1074
-
1075
-    if(y+1<(unsigned)height && y<(unsigned)height){
1076
-        int x;
1077
-
1078
-        for(x=0; x<width; x++){
1079
-            b2[x] -= (b1[x] + b3[x] + 2)>>2;
1080
-            b1[x] += (b0[x] + b2[x])>>1;
1081
-        }
1082
-    }else{
1083
-        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1084
-        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1085
-    }
1086
-
1087
-        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1088
-        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1089
-
1090
-    cs->b0 = b2;
1091
-    cs->b1 = b3;
1092
-    cs->y += 2;
1093
-}
1094
-
1095
-static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1096
-    int y= cs->y;
1097
-    IDWTELEM *b0= cs->b0;
1098
-    IDWTELEM *b1= cs->b1;
1099
-    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1100
-    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1101
-
1102
-        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1103
-        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1104
-
1105
-        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1106
-        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1107
-
1108
-    cs->b0 = b2;
1109
-    cs->b1 = b3;
1110
-    cs->y += 2;
1111
-}
1112
-
1113
-static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1114
-    DWTCompose cs;
1115
-    spatial_compose53i_init(&cs, buffer, height, stride);
1116
-    while(cs.y <= height)
1117
-        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1118
-}
1119
-
1120
-
1121
-void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1122
-    IDWTELEM temp[width];
1123
-    const int w2= (width+1)>>1;
1124
-
1125
-#if 0 //maybe more understadable but slower
1126
-    inv_lift (temp   , b      , b   +w2, 2, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1127
-    inv_lift (temp+1 , b   +w2, temp   , 2, 1, 2, width,  W_CM, W_CO, W_CS, 1, 1);
1128
-
1129
-    inv_liftS(b      , temp   , temp+1 , 2, 2, 2, width,  W_BM, W_BO, W_BS, 0, 1);
1130
-    inv_lift (b+1    , temp+1 , b      , 2, 2, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1131
-#else
1132
-    int x;
1133
-    temp[0] = b[0] - ((3*b[w2]+2)>>2);
1134
-    for(x=1; x<(width>>1); x++){
1135
-        temp[2*x  ] = b[x     ] - ((3*(b   [x+w2-1] + b[x+w2])+4)>>3);
1136
-        temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1137
-    }
1138
-    if(width&1){
1139
-        temp[2*x  ] = b[x     ] - ((3*b   [x+w2-1]+2)>>2);
1140
-        temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1141
-    }else
1142
-        temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
1143
-
1144
-    b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
1145
-    for(x=2; x<width-1; x+=2){
1146
-        b[x  ] = temp[x  ] + ((4*temp[x  ] + temp[x-1] + temp[x+1]+8)>>4);
1147
-        b[x-1] = temp[x-1] + ((3*(b  [x-2] + b   [x  ] ))>>1);
1148
-    }
1149
-    if(width&1){
1150
-        b[x  ] = temp[x  ] + ((2*temp[x  ] + temp[x-1]+4)>>3);
1151
-        b[x-1] = temp[x-1] + ((3*(b  [x-2] + b   [x  ] ))>>1);
1152
-    }else
1153
-        b[x-1] = temp[x-1] + 3*b [x-2];
1154
-#endif
1155
-}
1156
-
1157
-static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1158
-    int i;
1159
-
1160
-    for(i=0; i<width; i++){
1161
-        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1162
-    }
1163
-}
1164
-
1165
-static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1166
-    int i;
1167
-
1168
-    for(i=0; i<width; i++){
1169
-        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1170
-    }
1171
-}
1172
-
1173
-static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1174
-    int i;
1175
-
1176
-    for(i=0; i<width; i++){
1177
-#ifdef liftS
1178
-        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1179
-#else
1180
-        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1181
-#endif
1182
-    }
1183
-}
1184
-
1185
-static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1186
-    int i;
1187
-
1188
-    for(i=0; i<width; i++){
1189
-        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1190
-    }
1191
-}
1192
-
1193
-void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1194
-    int i;
1195
-
1196
-    for(i=0; i<width; i++){
1197
-        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1198
-        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1199
-#ifdef liftS
1200
-        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1201
-#else
1202
-        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1203
-#endif
1204
-        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1205
-    }
1206
-}
1207
-
1208
-static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1209
-    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1210
-    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1211
-    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1212
-    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1213
-    cs->y = -3;
1214
-}
1215
-
1216
-static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1217
-    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1218
-    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1219
-    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1220
-    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1221
-    cs->y = -3;
1222
-}
1223
-
1224
-static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1225
-    int y = cs->y;
1226
-
1227
-    IDWTELEM *b0= cs->b0;
1228
-    IDWTELEM *b1= cs->b1;
1229
-    IDWTELEM *b2= cs->b2;
1230
-    IDWTELEM *b3= cs->b3;
1231
-    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1232
-    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1233
-
1234
-    if(y>0 && y+4<height){
1235
-        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1236
-    }else{
1237
-        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1238
-        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1239
-        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1240
-        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1241
-    }
1242
-
1243
-    if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1244
-    if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1245
-
1246
-    cs->b0=b2;
1247
-    cs->b1=b3;
1248
-    cs->b2=b4;
1249
-    cs->b3=b5;
1250
-    cs->y += 2;
1251
-}
1252
-
1253
-static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1254
-    int y = cs->y;
1255
-    IDWTELEM *b0= cs->b0;
1256
-    IDWTELEM *b1= cs->b1;
1257
-    IDWTELEM *b2= cs->b2;
1258
-    IDWTELEM *b3= cs->b3;
1259
-    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1260
-    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1261
-
1262
-    if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1263
-    if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1264
-    if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1265
-    if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1266
-
1267
-    if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1268
-    if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1269
-
1270
-    cs->b0=b2;
1271
-    cs->b1=b3;
1272
-    cs->b2=b4;
1273
-    cs->b3=b5;
1274
-    cs->y += 2;
1275
-}
1276
-
1277
-static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1278
-    DWTCompose cs;
1279
-    spatial_compose97i_init(&cs, buffer, height, stride);
1280
-    while(cs.y <= height)
1281
-        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1282
-}
1283
-
1284
-static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1285
-    int level;
1286
-    for(level=decomposition_count-1; level>=0; level--){
1287
-        switch(type){
1288
-        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1289
-        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1290
-        }
1291
-    }
1292
-}
1293
-
1294
-static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1295
-    const int support = type==1 ? 3 : 5;
1296
-    int level;
1297
-    if(type==2) return;
1298
-
1299
-    for(level=decomposition_count-1; level>=0; level--){
1300
-        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1301
-            switch(type){
1302
-            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1303
-                break;
1304
-            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1305
-                break;
1306
-            }
1307
-        }
1308
-    }
1309
-}
1310
-
1311 712
 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1312 713
     const int w= b->width;
1313 714
     const int h= b->height;
... ...
@@ -2043,7 +1359,7 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer
2043 2043
     }
2044 2044
 #else
2045 2045
     if(sliced){
2046
-        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2046
+        s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2047 2047
     }else{
2048 2048
         for(y=0; y<b_h; y++){
2049 2049
             //FIXME ugly misuse of obmc_stride
... ...
@@ -2366,6 +1682,7 @@ static av_cold int common_init(AVCodecContext *avctx){
2366 2366
     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
2367 2367
 
2368 2368
     dsputil_init(&s->dsp, avctx);
2369
+    ff_dwt_init(&s->dwt);
2369 2370
 
2370 2371
 #define mcf(dx,dy)\
2371 2372
     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
... ...
@@ -2865,7 +2182,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
2865 2865
             }
2866 2866
 
2867 2867
             for(; yd<slice_h; yd+=4){
2868
-                ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2868
+                ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2869 2869
             }
2870 2870
 
2871 2871
             if(s->qlog == LOSSLESS_QLOG){
... ...
@@ -3607,41 +2924,6 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3607 3607
     return distortion + rate*penalty_factor;
3608 3608
 }
3609 3609
 
3610
-static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3611
-    int level;
3612
-    for(level=decomposition_count-1; level>=0; level--){
3613
-        switch(type){
3614
-        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
3615
-        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
3616
-        }
3617
-    }
3618
-}
3619
-
3620
-static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
3621
-    const int support = type==1 ? 3 : 5;
3622
-    int level;
3623
-    if(type==2) return;
3624
-
3625
-    for(level=decomposition_count-1; level>=0; level--){
3626
-        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
3627
-            switch(type){
3628
-            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3629
-                break;
3630
-            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3631
-                break;
3632
-            }
3633
-        }
3634
-    }
3635
-}
3636
-
3637
-static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3638
-        DWTCompose cs[MAX_DECOMPOSITIONS];
3639
-        int y;
3640
-        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
3641
-        for(y=0; y<height; y+=4)
3642
-            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
3643
-}
3644
-
3645 3610
 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3646 3611
     const int w= b->width;
3647 3612
     const int h= b->height;
... ...
@@ -23,10 +23,10 @@
23 23
 #define AVCODEC_SNOW_H
24 24
 
25 25
 #include "dsputil.h"
26
+#include "dwt.h"
26 27
 
27 28
 #define MID_STATE 128
28 29
 
29
-#define MAX_DECOMPOSITIONS 8
30 30
 #define MAX_PLANES 4
31 31
 #define QSHIFT 5
32 32
 #define QROOT (1<<QSHIFT)
... ...
@@ -37,101 +37,6 @@
37 37
 #define LOG2_OBMC_MAX 8
38 38
 #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
39 39
 
40
-#define DWT_97 0
41
-#define DWT_53 1
42
-
43
-/** Used to minimize the amount of memory used in order to optimize cache performance. **/
44
-struct slice_buffer_s {
45
-    IDWTELEM * * line; ///< For use by idwt and predict_slices.
46
-    IDWTELEM * * data_stack; ///< Used for internal purposes.
47
-    int data_stack_top;
48
-    int line_count;
49
-    int line_width;
50
-    int data_count;
51
-    IDWTELEM * base_buffer; ///< Buffer that this structure is caching.
52
-};
53
-
54
-#define liftS lift
55
-#if 1
56
-#define W_AM 3
57
-#define W_AO 0
58
-#define W_AS 1
59
-
60
-#undef liftS
61
-#define W_BM 1
62
-#define W_BO 8
63
-#define W_BS 4
64
-
65
-#define W_CM 1
66
-#define W_CO 0
67
-#define W_CS 0
68
-
69
-#define W_DM 3
70
-#define W_DO 4
71
-#define W_DS 3
72
-#elif 0
73
-#define W_AM 55
74
-#define W_AO 16
75
-#define W_AS 5
76
-
77
-#define W_BM 3
78
-#define W_BO 32
79
-#define W_BS 6
80
-
81
-#define W_CM 127
82
-#define W_CO 64
83
-#define W_CS 7
84
-
85
-#define W_DM 7
86
-#define W_DO 8
87
-#define W_DS 4
88
-#elif 0
89
-#define W_AM 97
90
-#define W_AO 32
91
-#define W_AS 6
92
-
93
-#define W_BM 63
94
-#define W_BO 512
95
-#define W_BS 10
96
-
97
-#define W_CM 13
98
-#define W_CO 8
99
-#define W_CS 4
100
-
101
-#define W_DM 15
102
-#define W_DO 16
103
-#define W_DS 5
104
-
105
-#else
106
-
107
-#define W_AM 203
108
-#define W_AO 64
109
-#define W_AS 7
110
-
111
-#define W_BM 217
112
-#define W_BO 2048
113
-#define W_BS 12
114
-
115
-#define W_CM 113
116
-#define W_CO 64
117
-#define W_CS 7
118
-
119
-#define W_DM 227
120
-#define W_DO 128
121
-#define W_DS 9
122
-#endif
123
-
124
-void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
125
-void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
126
-void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
127
-
128
-#if CONFIG_SNOW_ENCODER
129
-int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
130
-int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
131
-#endif
132
-
133
-void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
134
-
135 40
 /* C bits used by mmx/sse2/altivec */
136 41
 
137 42
 static av_always_inline void snow_interleave_line_header(int * i, int width, IDWTELEM * low, IDWTELEM * high){
... ...
@@ -2894,25 +2894,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2894 2894
         }
2895 2895
 #endif
2896 2896
 
2897
-#if CONFIG_SNOW_DECODER
2898
-        if(mm_flags & FF_MM_SSE2 & 0){
2899
-            c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
2900
-#if HAVE_7REGS
2901
-            c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
2902
-#endif
2903
-            c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
2904
-        }
2905
-        else{
2906
-            if(mm_flags & FF_MM_MMX2){
2907
-            c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
2908
-#if HAVE_7REGS
2909
-            c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
2910
-#endif
2911
-            }
2912
-            c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
2913
-        }
2914
-#endif
2915
-
2916 2897
         if(mm_flags & FF_MM_3DNOW){
2917 2898
             c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
2918 2899
             c->vector_fmul = vector_fmul_3dnow;
... ...
@@ -167,15 +167,6 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
167 167
 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
168 168
 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
169 169
 
170
-void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);
171
-void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);
172
-void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
173
-void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
174
-void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
175
-                                   int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
176
-void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
177
-                                  int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
178
-
179 170
 void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
180 171
                                    double *autoc);
181 172
 
... ...
@@ -22,9 +22,10 @@
22 22
 #include "libavutil/x86_cpu.h"
23 23
 #include "libavcodec/avcodec.h"
24 24
 #include "libavcodec/snow.h"
25
+#include "libavcodec/dwt.h"
25 26
 #include "dsputil_mmx.h"
26 27
 
27
-void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
28
+static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
28 29
     const int w2= (width+1)>>1;
29 30
     DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1];
30 31
     const int w_l= (width>>1);
... ...
@@ -213,7 +214,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
213 213
     }
214 214
 }
215 215
 
216
-void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
216
+static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
217 217
     const int w2= (width+1)>>1;
218 218
     IDWTELEM temp[width >> 1];
219 219
     const int w_l= (width>>1);
... ...
@@ -436,7 +437,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
436 436
         "movdqa %%"s2", %%"t2" \n\t"\
437 437
         "movdqa %%"s3", %%"t3" \n\t"
438 438
 
439
-void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
439
+static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
440 440
     x86_reg i = width;
441 441
 
442 442
     while(i & 0x1F)
... ...
@@ -534,7 +535,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
534 534
         "movq %%"s3", %%"t3" \n\t"
535 535
 
536 536
 
537
-void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
537
+static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
538 538
     x86_reg i = width;
539 539
     while(i & 15)
540 540
     {
... ...
@@ -847,7 +848,7 @@ snow_inner_add_yblock_mmx_mix("16", "8")
847 847
 snow_inner_add_yblock_mmx_end("32")
848 848
 }
849 849
 
850
-void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
850
+static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
851 851
                            int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
852 852
 
853 853
     if (b_w == 16)
... ...
@@ -861,7 +862,7 @@ void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, u
861 861
          ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
862 862
 }
863 863
 
864
-void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
864
+static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
865 865
                           int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
866 866
     if (b_w == 16)
867 867
         inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
... ...
@@ -870,3 +871,27 @@ void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui
870 870
     else
871 871
         ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
872 872
 }
873
+
874
+void ff_dwt_init_x86(DWTContext *c)
875
+{
876
+    mm_flags = mm_support();
877
+
878
+    if (mm_flags & FF_MM_MMX) {
879
+        if(mm_flags & FF_MM_SSE2 & 0){
880
+            c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
881
+#if HAVE_7REGS
882
+            c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
883
+#endif
884
+            c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
885
+        }
886
+        else{
887
+            if(mm_flags & FF_MM_MMX2){
888
+            c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
889
+#if HAVE_7REGS
890
+            c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
891
+#endif
892
+            }
893
+            c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
894
+        }
895
+    }
896
+}