Browse code

new files for the CAVS decoder by (Stefan Gehrer <stefan gehrer gmx de)

Originally committed as revision 5567 to svn://svn.ffmpeg.org/ffmpeg/trunk

Michael Niedermayer authored on 2006/07/02 07:52:56
Showing 4 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,1550 @@
0
+/*
1
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
2
+ * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
3
+ *
4
+ * This library is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU Lesser General Public
6
+ * License as published by the Free Software Foundation; either
7
+ * version 2 of the License, or (at your option) any later version.
8
+ *
9
+ * This library is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * Lesser General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Lesser General Public
15
+ * License along with this library; if not, write to the Free Software
16
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
+ */
18
+
19
+#include "avcodec.h"
20
+#include "bitstream.h"
21
+#include "golomb.h"
22
+#include "mpegvideo.h"
23
+#include "cavsdata.h"
24
+
25
+typedef struct {
26
+    MpegEncContext s;
27
+    Picture picture; //currently decoded frame
28
+    Picture DPB[2];  //reference frames
29
+    int dist[2];     //temporal distances from current frame to ref frames
30
+    int profile, level;
31
+    int aspect_ratio;
32
+    int mb_width, mb_height;
33
+    int pic_type;
34
+    int progressive;
35
+    int pic_structure;
36
+    int skip_mode_flag;
37
+    int loop_filter_disable;
38
+    int alpha_offset, beta_offset;
39
+    int ref_flag;
40
+    int mbx, mby;
41
+    int flags;
42
+    int stc;
43
+    uint8_t *cy, *cu, *cv;
44
+    int left_qp;
45
+    uint8_t *top_qp;
46
+
47
+    /* mv motion vector cache
48
+       0:    D3  B2  B3  C2
49
+       4:    A1  X0  X1   -
50
+       8:    A3  X2  X3   -
51
+
52
+       X are the vectors in the current macroblock (5,6,9,10)
53
+       A is the macroblock to the left (4,8)
54
+       B is the macroblock to the top (1,2)
55
+       C is the macroblock to the top-right (3)
56
+       D is the macroblock to the top-left (0)
57
+
58
+       the same is repeated for backward motion vectors */
59
+    vector_t mv[2*4*3];
60
+    vector_t *top_mv[2];
61
+    vector_t *col_mv;
62
+
63
+    /* luma pred mode cache
64
+       0:    --  B2  B3
65
+       3:    A1  X0  X1
66
+       6:    A3  X2  X3   */
67
+    int pred_mode_Y[3*3];
68
+    int *top_pred_Y;
69
+    int l_stride, c_stride;
70
+    int luma_scan[4];
71
+    int qp;
72
+    int qp_fixed;
73
+    int cbp;
74
+
75
+    /* intra prediction is done with un-deblocked samples
76
+     they are saved here before deblocking the MB  */
77
+    uint8_t *top_border_y, *top_border_u, *top_border_v;
78
+    uint8_t left_border_y[16], left_border_u[8], left_border_v[8];
79
+    uint8_t topleft_border_y, topleft_border_u, topleft_border_v;
80
+
81
+    void (*intra_pred_l[8])(uint8_t *d,uint8_t *top,uint8_t *left,int stride);
82
+    void (*intra_pred_c[7])(uint8_t *d,uint8_t *top,uint8_t *left,int stride);
83
+    uint8_t *col_type_base;
84
+    uint8_t *col_type;
85
+    int sym_factor;
86
+    int direct_den[2];
87
+    int scale_den[2];
88
+    int got_keyframe;
89
+} AVSContext;
90
+
91
+/*****************************************************************************
92
+ *
93
+ * in-loop deblocking filter
94
+ *
95
+ ****************************************************************************/
96
+
97
+static inline int get_bs_p(vector_t *mvP, vector_t *mvQ) {
98
+    if((mvP->ref == REF_INTRA) || (mvQ->ref == REF_INTRA))
99
+        return 2;
100
+    if(mvP->ref != mvQ->ref)
101
+        return 1;
102
+    if( (abs(mvP->x - mvQ->x) >= 4) ||  (abs(mvP->y - mvQ->y) >= 4) )
103
+        return 1;
104
+    return 0;
105
+}
106
+
107
+static inline int get_bs_b(vector_t *mvP, vector_t *mvQ) {
108
+    if((mvP->ref == REF_INTRA) || (mvQ->ref == REF_INTRA)) {
109
+        return 2;
110
+    } else {
111
+        vector_t *mvPbw = mvP + MV_BWD_OFFS;
112
+        vector_t *mvQbw = mvQ + MV_BWD_OFFS;
113
+        if( (abs(  mvP->x -   mvQ->x) >= 4) ||
114
+            (abs(  mvP->y -   mvQ->y) >= 4) ||
115
+            (abs(mvPbw->x - mvQbw->x) >= 4) ||
116
+            (abs(mvPbw->y - mvQbw->y) >= 4) )
117
+            return 1;
118
+    }
119
+    return 0;
120
+}
121
+
122
+/* boundary strength (bs) mapping:
123
+ *
124
+ * --4---5--
125
+ * 0   2   |
126
+ * | 6 | 7 |
127
+ * 1   3   |
128
+ * ---------
129
+ *
130
+ */
131
+
132
+#define SET_PARAMS                                            \
133
+    alpha = alpha_tab[clip(qp_avg + h->alpha_offset,0,63)];   \
134
+    beta  =  beta_tab[clip(qp_avg + h->beta_offset, 0,63)];   \
135
+    tc    =    tc_tab[clip(qp_avg + h->alpha_offset,0,63)];
136
+
137
+static void filter_mb(AVSContext *h, enum mb_t mb_type) {
138
+    uint8_t bs[8];
139
+    int qp_avg, alpha, beta, tc;
140
+    int i;
141
+
142
+    /* save un-deblocked lines */
143
+    h->topleft_border_y = h->top_border_y[h->mbx*16+15];
144
+    h->topleft_border_u = h->top_border_u[h->mbx*8+7];
145
+    h->topleft_border_v = h->top_border_v[h->mbx*8+7];
146
+    memcpy(&h->top_border_y[h->mbx*16], h->cy + 15* h->l_stride,16);
147
+    memcpy(&h->top_border_u[h->mbx* 8], h->cu +  7* h->c_stride,8);
148
+    memcpy(&h->top_border_v[h->mbx* 8], h->cv +  7* h->c_stride,8);
149
+    for(i=0;i<8;i++) {
150
+        h->left_border_y[i*2+0] = *(h->cy + 15 + (i*2+0)*h->l_stride);
151
+        h->left_border_y[i*2+1] = *(h->cy + 15 + (i*2+1)*h->l_stride);
152
+        h->left_border_u[i] = *(h->cu + 7 + i*h->c_stride);
153
+        h->left_border_v[i] = *(h->cv + 7 + i*h->c_stride);
154
+    }
155
+    if(!h->loop_filter_disable) {
156
+        /* clear bs */
157
+        *((uint64_t *)bs) = 0;
158
+        /* determine bs */
159
+        switch(mb_type) {
160
+        case I_8X8:
161
+            *((uint64_t *)bs) = 0x0202020202020202ULL;
162
+            break;
163
+        case P_8X8:
164
+        case P_8X16:
165
+            bs[2] = get_bs_p(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X1]);
166
+            bs[3] = get_bs_p(&h->mv[MV_FWD_X2], &h->mv[MV_FWD_X3]);
167
+        case P_16X8:
168
+            bs[6] = get_bs_p(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X2]);
169
+            bs[7] = get_bs_p(&h->mv[MV_FWD_X1], &h->mv[MV_FWD_X3]);
170
+        case P_16X16:
171
+        case P_SKIP:
172
+            bs[0] = get_bs_p(&h->mv[MV_FWD_A1], &h->mv[MV_FWD_X0]);
173
+            bs[1] = get_bs_p(&h->mv[MV_FWD_A3], &h->mv[MV_FWD_X2]);
174
+            bs[4] = get_bs_p(&h->mv[MV_FWD_B2], &h->mv[MV_FWD_X0]);
175
+            bs[5] = get_bs_p(&h->mv[MV_FWD_B3], &h->mv[MV_FWD_X1]);
176
+            break;
177
+        case B_SKIP:
178
+        case B_DIRECT:
179
+        case B_8X8:
180
+            bs[2] = get_bs_b(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X1]);
181
+            bs[3] = get_bs_b(&h->mv[MV_FWD_X2], &h->mv[MV_FWD_X3]);
182
+            bs[6] = get_bs_b(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X2]);
183
+            bs[7] = get_bs_b(&h->mv[MV_FWD_X1], &h->mv[MV_FWD_X3]);
184
+        case B_FWD_16X16:
185
+        case B_BWD_16X16:
186
+        case B_SYM_16X16:
187
+            bs[0] = get_bs_b(&h->mv[MV_FWD_A1], &h->mv[MV_FWD_X0]);
188
+            bs[1] = get_bs_b(&h->mv[MV_FWD_A3], &h->mv[MV_FWD_X2]);
189
+            bs[4] = get_bs_b(&h->mv[MV_FWD_B2], &h->mv[MV_FWD_X0]);
190
+            bs[5] = get_bs_b(&h->mv[MV_FWD_B3], &h->mv[MV_FWD_X1]);
191
+            break;
192
+        default:
193
+            if(mb_type & 1) { //16X8
194
+                bs[6] = bs[7] = get_bs_b(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X2]);
195
+            } else {          //8X16
196
+                bs[2] = bs[3] = get_bs_b(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X1]);
197
+            }
198
+            bs[0] = get_bs_b(&h->mv[MV_FWD_A1], &h->mv[MV_FWD_X0]);
199
+            bs[1] = get_bs_b(&h->mv[MV_FWD_A3], &h->mv[MV_FWD_X2]);
200
+            bs[4] = get_bs_b(&h->mv[MV_FWD_B2], &h->mv[MV_FWD_X0]);
201
+            bs[5] = get_bs_b(&h->mv[MV_FWD_B3], &h->mv[MV_FWD_X1]);
202
+        }
203
+        if( *((uint64_t *)bs) ) {
204
+            if(h->flags & A_AVAIL) {
205
+                qp_avg = (h->qp + h->left_qp + 1) >> 1;
206
+                SET_PARAMS;
207
+                h->s.dsp.cavs_filter_lv(h->cy,h->l_stride,alpha,beta,tc,bs[0],bs[1]);
208
+                h->s.dsp.cavs_filter_cv(h->cu,h->c_stride,alpha,beta,tc,bs[0],bs[1]);
209
+                h->s.dsp.cavs_filter_cv(h->cv,h->c_stride,alpha,beta,tc,bs[0],bs[1]);
210
+            }
211
+            qp_avg = h->qp;
212
+            SET_PARAMS;
213
+            h->s.dsp.cavs_filter_lv(h->cy + 8,h->l_stride,alpha,beta,tc,bs[2],bs[3]);
214
+            h->s.dsp.cavs_filter_lh(h->cy + 8*h->l_stride,h->l_stride,alpha,beta,tc,
215
+                           bs[6],bs[7]);
216
+
217
+            if(h->flags & B_AVAIL) {
218
+                qp_avg = (h->qp + h->top_qp[h->mbx] + 1) >> 1;
219
+                SET_PARAMS;
220
+                h->s.dsp.cavs_filter_lh(h->cy,h->l_stride,alpha,beta,tc,bs[4],bs[5]);
221
+                h->s.dsp.cavs_filter_ch(h->cu,h->c_stride,alpha,beta,tc,bs[4],bs[5]);
222
+                h->s.dsp.cavs_filter_ch(h->cv,h->c_stride,alpha,beta,tc,bs[4],bs[5]);
223
+            }
224
+        }
225
+    }
226
+    h->left_qp = h->qp;
227
+    h->top_qp[h->mbx] = h->qp;
228
+}
229
+
230
+#undef SET_PARAMS
231
+
232
+/*****************************************************************************
233
+ *
234
+ * spatial intra prediction
235
+ *
236
+ ****************************************************************************/
237
+
238
+static inline void load_intra_pred_luma(AVSContext *h, uint8_t *top,
239
+                                        uint8_t *left, int block) {
240
+    int i;
241
+
242
+    switch(block) {
243
+    case 0:
244
+        memcpy(&left[1],h->left_border_y,16);
245
+        left[0] = left[1];
246
+        left[17] = left[16];
247
+        memcpy(&top[1],&h->top_border_y[h->mbx*16],16);
248
+        top[17] = top[16];
249
+        top[0] = top[1];
250
+        if((h->flags & A_AVAIL) && (h->flags & B_AVAIL))
251
+            left[0] = top[0] = h->topleft_border_y;
252
+        break;
253
+    case 1:
254
+        for(i=0;i<8;i++)
255
+            left[i+1] = *(h->cy + 7 + i*h->l_stride);
256
+        memset(&left[9],left[8],9);
257
+        left[0] = left[1];
258
+        memcpy(&top[1],&h->top_border_y[h->mbx*16+8],8);
259
+        if(h->flags & C_AVAIL)
260
+            memcpy(&top[9],&h->top_border_y[(h->mbx + 1)*16],8);
261
+        else
262
+            memset(&top[9],top[8],9);
263
+        top[17] = top[16];
264
+        top[0] = top[1];
265
+        if(h->flags & B_AVAIL)
266
+            left[0] = top[0] = h->top_border_y[h->mbx*16+7];
267
+        break;
268
+    case 2:
269
+        memcpy(&left[1],&h->left_border_y[8],8);
270
+        memset(&left[9],left[8],9);
271
+        memcpy(&top[1],h->cy + 7*h->l_stride,16);
272
+        top[17] = top[16];
273
+        left[0] = h->left_border_y[7];
274
+        top[0] = top[1];
275
+        if(h->flags & A_AVAIL)
276
+            top[0] = left[0];
277
+        break;
278
+    case 3:
279
+        for(i=0;i<9;i++)
280
+            left[i] = *(h->cy + 7 + (i+7)*h->l_stride);
281
+        memset(&left[9],left[8],9);
282
+        memcpy(&top[0],h->cy + 7 + 7*h->l_stride,9);
283
+        memset(&top[9],top[8],9);
284
+        break;
285
+    }
286
+}
287
+
288
+static inline void load_intra_pred_chroma(uint8_t *stop, uint8_t *sleft,
289
+                                          uint8_t stopleft, uint8_t *dtop,
290
+                                          uint8_t *dleft, int stride, int flags) {
291
+    int i;
292
+
293
+    if(flags & A_AVAIL) {
294
+        for(i=0; i<8; i++)
295
+            dleft[i+1] = sleft[i];
296
+        dleft[0] = dleft[1];
297
+        dleft[9] = dleft[8];
298
+    }
299
+    if(flags & B_AVAIL) {
300
+        for(i=0; i<8; i++)
301
+            dtop[i+1] = stop[i];
302
+        dtop[0] = dtop[1];
303
+        dtop[9] = dtop[8];
304
+        if(flags & A_AVAIL)
305
+            dleft[0] = dtop[0] = stopleft;
306
+    }
307
+}
308
+
309
+static void intra_pred_vert(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
310
+    int y;
311
+    uint64_t a = *((uint64_t *)(&top[1]));
312
+    for(y=0;y<8;y++) {
313
+        *((uint64_t *)(d+y*stride)) = a;
314
+    }
315
+}
316
+
317
+static void intra_pred_horiz(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
318
+    int y;
319
+    uint64_t a;
320
+    for(y=0;y<8;y++) {
321
+        a = left[y+1] * 0x0101010101010101ULL;
322
+        *((uint64_t *)(d+y*stride)) = a;
323
+    }
324
+}
325
+
326
+static void intra_pred_dc_128(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
327
+    int y;
328
+    uint64_t a = 0x8080808080808080ULL;
329
+    for(y=0;y<8;y++)
330
+        *((uint64_t *)(d+y*stride)) = a;
331
+}
332
+
333
+static void intra_pred_plane(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
334
+    int x,y,ia;
335
+    int ih = 0;
336
+    int iv = 0;
337
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;
338
+
339
+    for(x=0; x<4; x++) {
340
+        ih += (x+1)*(top[5+x]-top[3-x]);
341
+        iv += (x+1)*(left[5+x]-left[3-x]);
342
+    }
343
+    ia = (top[8]+left[8])<<4;
344
+    ih = (17*ih+16)>>5;
345
+    iv = (17*iv+16)>>5;
346
+    for(y=0; y<8; y++)
347
+        for(x=0; x<8; x++)
348
+            d[y*stride+x] = cm[(ia+(x-3)*ih+(y-3)*iv+16)>>5];
349
+}
350
+
351
+#define LOWPASS(ARRAY,INDEX)                                            \
352
+    (( ARRAY[(INDEX)-1] + 2*ARRAY[(INDEX)] + ARRAY[(INDEX)+1] + 2) >> 2)
353
+
354
+static void intra_pred_lp(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
355
+    int x,y;
356
+    for(y=0; y<8; y++)
357
+        for(x=0; x<8; x++)
358
+            d[y*stride+x] = (LOWPASS(top,x+1) + LOWPASS(left,y+1)) >> 1;
359
+}
360
+
361
+static void intra_pred_down_left(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
362
+    int x,y;
363
+    for(y=0; y<8; y++)
364
+        for(x=0; x<8; x++)
365
+            d[y*stride+x] = (LOWPASS(top,x+y+2) + LOWPASS(left,x+y+2)) >> 1;
366
+}
367
+
368
+static void intra_pred_down_right(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
369
+    int x,y;
370
+    for(y=0; y<8; y++)
371
+        for(x=0; x<8; x++)
372
+            if(x==y)
373
+                d[y*stride+x] = (left[1]+2*top[0]+top[1]+2)>>2;
374
+            else if(x>y)
375
+                d[y*stride+x] = LOWPASS(top,x-y);
376
+            else
377
+                d[y*stride+x] = LOWPASS(left,y-x);
378
+}
379
+
380
+static void intra_pred_lp_left(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
381
+    int x,y;
382
+    for(y=0; y<8; y++)
383
+        for(x=0; x<8; x++)
384
+            d[y*stride+x] = LOWPASS(left,y+1);
385
+}
386
+
387
+static void intra_pred_lp_top(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
388
+    int x,y;
389
+    for(y=0; y<8; y++)
390
+        for(x=0; x<8; x++)
391
+            d[y*stride+x] = LOWPASS(top,x+1);
392
+}
393
+
394
+#undef LOWPASS
395
+
396
+static inline void modify_pred(const int8_t *mod_table, int *mode) {
397
+    int newmode = mod_table[(int)*mode];
398
+    if(newmode < 0) {
399
+        av_log(NULL, AV_LOG_ERROR, "Illegal intra prediction mode\n");
400
+        *mode = 0;
401
+    } else {
402
+        *mode = newmode;
403
+    }
404
+}
405
+
406
+/*****************************************************************************
407
+ *
408
+ * motion compensation
409
+ *
410
+ ****************************************************************************/
411
+
412
+static inline void mc_dir_part(AVSContext *h,Picture *pic,int square,
413
+                        int chroma_height,int delta,int list,uint8_t *dest_y,
414
+                        uint8_t *dest_cb,uint8_t *dest_cr,int src_x_offset,
415
+                        int src_y_offset,qpel_mc_func *qpix_op,
416
+                        h264_chroma_mc_func chroma_op,vector_t *mv){
417
+    MpegEncContext * const s = &h->s;
418
+    const int mx= mv->x + src_x_offset*8;
419
+    const int my= mv->y + src_y_offset*8;
420
+    const int luma_xy= (mx&3) + ((my&3)<<2);
421
+    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->l_stride;
422
+    uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->c_stride;
423
+    uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->c_stride;
424
+    int extra_width= 0; //(s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
425
+    int extra_height= extra_width;
426
+    int emu=0;
427
+    const int full_mx= mx>>2;
428
+    const int full_my= my>>2;
429
+    const int pic_width  = 16*h->mb_width;
430
+    const int pic_height = 16*h->mb_height;
431
+
432
+    if(!pic->data[0])
433
+        return;
434
+    if(mx&7) extra_width -= 3;
435
+    if(my&7) extra_height -= 3;
436
+
437
+    if(   full_mx < 0-extra_width
438
+          || full_my < 0-extra_height
439
+          || full_mx + 16/*FIXME*/ > pic_width + extra_width
440
+          || full_my + 16/*FIXME*/ > pic_height + extra_height){
441
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->l_stride, h->l_stride,
442
+                            16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
443
+        src_y= s->edge_emu_buffer + 2 + 2*h->l_stride;
444
+        emu=1;
445
+    }
446
+
447
+    qpix_op[luma_xy](dest_y, src_y, h->l_stride); //FIXME try variable height perhaps?
448
+    if(!square){
449
+        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->l_stride);
450
+    }
451
+
452
+    if(emu){
453
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->c_stride,
454
+                            9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
455
+        src_cb= s->edge_emu_buffer;
456
+    }
457
+    chroma_op(dest_cb, src_cb, h->c_stride, chroma_height, mx&7, my&7);
458
+
459
+    if(emu){
460
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->c_stride,
461
+                            9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
462
+        src_cr= s->edge_emu_buffer;
463
+    }
464
+    chroma_op(dest_cr, src_cr, h->c_stride, chroma_height, mx&7, my&7);
465
+}
466
+
467
+static inline void mc_part_std(AVSContext *h,int square,int chroma_height,int delta,
468
+                        uint8_t *dest_y,uint8_t *dest_cb,uint8_t *dest_cr,
469
+                        int x_offset, int y_offset,qpel_mc_func *qpix_put,
470
+                        h264_chroma_mc_func chroma_put,qpel_mc_func *qpix_avg,
471
+                        h264_chroma_mc_func chroma_avg, vector_t *mv){
472
+    qpel_mc_func *qpix_op=  qpix_put;
473
+    h264_chroma_mc_func chroma_op= chroma_put;
474
+
475
+    dest_y  += 2*x_offset + 2*y_offset*h->l_stride;
476
+    dest_cb +=   x_offset +   y_offset*h->c_stride;
477
+    dest_cr +=   x_offset +   y_offset*h->c_stride;
478
+    x_offset += 8*h->mbx;
479
+    y_offset += 8*h->mby;
480
+
481
+    if(mv->ref >= 0){
482
+        Picture *ref= &h->DPB[mv->ref];
483
+        mc_dir_part(h, ref, square, chroma_height, delta, 0,
484
+                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
485
+                    qpix_op, chroma_op, mv);
486
+
487
+        qpix_op=  qpix_avg;
488
+        chroma_op= chroma_avg;
489
+    }
490
+
491
+    if((mv+MV_BWD_OFFS)->ref >= 0){
492
+        Picture *ref= &h->DPB[0];
493
+        mc_dir_part(h, ref, square, chroma_height, delta, 1,
494
+                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
495
+                    qpix_op, chroma_op, mv+MV_BWD_OFFS);
496
+    }
497
+}
498
+
499
+static void inter_pred(AVSContext *h) {
500
+    /* always do 8x8 blocks TODO: are larger blocks worth it? */
501
+    mc_part_std(h, 1, 4, 0, h->cy, h->cu, h->cv, 0, 0,
502
+                h->s.dsp.put_cavs_qpel_pixels_tab[1],
503
+                h->s.dsp.put_h264_chroma_pixels_tab[1],
504
+                h->s.dsp.avg_cavs_qpel_pixels_tab[1],
505
+                h->s.dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X0]);
506
+    mc_part_std(h, 1, 4, 0, h->cy, h->cu, h->cv, 4, 0,
507
+                h->s.dsp.put_cavs_qpel_pixels_tab[1],
508
+                h->s.dsp.put_h264_chroma_pixels_tab[1],
509
+                h->s.dsp.avg_cavs_qpel_pixels_tab[1],
510
+                h->s.dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X1]);
511
+    mc_part_std(h, 1, 4, 0, h->cy, h->cu, h->cv, 0, 4,
512
+                h->s.dsp.put_cavs_qpel_pixels_tab[1],
513
+                h->s.dsp.put_h264_chroma_pixels_tab[1],
514
+                h->s.dsp.avg_cavs_qpel_pixels_tab[1],
515
+                h->s.dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X2]);
516
+    mc_part_std(h, 1, 4, 0, h->cy, h->cu, h->cv, 4, 4,
517
+                h->s.dsp.put_cavs_qpel_pixels_tab[1],
518
+                h->s.dsp.put_h264_chroma_pixels_tab[1],
519
+                h->s.dsp.avg_cavs_qpel_pixels_tab[1],
520
+                h->s.dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X3]);
521
+    /* set intra prediction modes to default values */
522
+    h->pred_mode_Y[3] =  h->pred_mode_Y[6] = INTRA_L_LP;
523
+    h->top_pred_Y[h->mbx*2+0] = h->top_pred_Y[h->mbx*2+1] = INTRA_L_LP;
524
+}
525
+
526
+/*****************************************************************************
527
+ *
528
+ * motion vector prediction
529
+ *
530
+ ****************************************************************************/
531
+
532
+static inline void veccpy(vector_t *dst, vector_t *src) {
533
+    *((uint64_t *)dst) = *((uint64_t *)src);
534
+}
535
+
536
+static inline void set_mvs(vector_t *mv, enum block_t size) {
537
+    switch(size) {
538
+    case BLK_16X16:
539
+        veccpy(mv+MV_STRIDE  ,mv);
540
+        veccpy(mv+MV_STRIDE+1,mv);
541
+    case BLK_16X8:
542
+        veccpy(mv          +1,mv);
543
+        break;
544
+    case BLK_8X16:
545
+        veccpy(mv+MV_STRIDE  ,mv);
546
+        break;
547
+    }
548
+}
549
+
550
+static inline void store_mvs(AVSContext *h) {
551
+    veccpy(&h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + 0], &h->mv[MV_FWD_X0]);
552
+    veccpy(&h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + 1], &h->mv[MV_FWD_X1]);
553
+    veccpy(&h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + 2], &h->mv[MV_FWD_X2]);
554
+    veccpy(&h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + 3], &h->mv[MV_FWD_X3]);
555
+}
556
+
557
+static inline void scale_mv(AVSContext *h, int *d_x, int *d_y, vector_t *src, int distp) {
558
+    int den = h->scale_den[src->ref];
559
+
560
+    *d_x = (src->x*distp*den + 256 + (src->x>>31)) >> 9;
561
+    *d_y = (src->y*distp*den + 256 + (src->y>>31)) >> 9;
562
+}
563
+
564
+static inline void mv_pred_median(AVSContext *h, vector_t *mvP, vector_t *mvA, vector_t *mvB, vector_t *mvC) {
565
+    int ax, ay, bx, by, cx, cy;
566
+    int len_ab, len_bc, len_ca, len_mid;
567
+
568
+    /* scale candidates according to their temporal span */
569
+    scale_mv(h, &ax, &ay, mvA, mvP->dist);
570
+    scale_mv(h, &bx, &by, mvB, mvP->dist);
571
+    scale_mv(h, &cx, &cy, mvC, mvP->dist);
572
+    /* find the geometrical median of the three candidates */
573
+    len_ab = abs(ax - bx) + abs(ay - by);
574
+    len_bc = abs(bx - cx) + abs(by - cy);
575
+    len_ca = abs(cx - ax) + abs(cy - ay);
576
+    len_mid = mid_pred(len_ab, len_bc, len_ca);
577
+    if(len_mid == len_ab) {
578
+        mvP->x = cx;
579
+        mvP->y = cy;
580
+    } else if(len_mid == len_bc) {
581
+        mvP->x = ax;
582
+        mvP->y = ay;
583
+    } else {
584
+        mvP->x = bx;
585
+        mvP->y = by;
586
+    }
587
+}
588
+
589
+static inline void mv_pred_direct(AVSContext *h, vector_t *pmv_fw,
590
+                                  vector_t *pmv_bw, vector_t *col_mv) {
591
+    int den = h->direct_den[col_mv->ref];
592
+    int m = col_mv->x >> 31;
593
+
594
+    pmv_fw->dist = h->dist[1];
595
+    pmv_bw->dist = h->dist[0];
596
+    pmv_fw->ref = 1;
597
+    pmv_bw->ref = 0;
598
+    /* scale the co-located motion vector according to its temporal span */
599
+    pmv_fw->x = (((den+(den*col_mv->x*pmv_fw->dist^m)-m-1)>>14)^m)-m;
600
+    pmv_bw->x = m-(((den+(den*col_mv->x*pmv_bw->dist^m)-m-1)>>14)^m);
601
+    m = col_mv->y >> 31;
602
+    pmv_fw->y = (((den+(den*col_mv->y*pmv_fw->dist^m)-m-1)>>14)^m)-m;
603
+    pmv_bw->y = m-(((den+(den*col_mv->y*pmv_bw->dist^m)-m-1)>>14)^m);
604
+}
605
+
606
+static inline void mv_pred_sym(AVSContext *h, vector_t *src, enum block_t size) {
607
+    vector_t *dst = src + MV_BWD_OFFS;
608
+
609
+    /* backward mv is the scaled and negated forward mv */
610
+    dst->x = -((src->x * h->sym_factor + 256) >> 9);
611
+    dst->y = -((src->y * h->sym_factor + 256) >> 9);
612
+    dst->ref = 0;
613
+    dst->dist = h->dist[0];
614
+    set_mvs(dst, size);
615
+}
616
+
617
+static void mv_pred(AVSContext *h, enum mv_loc_t nP, enum mv_loc_t nC,
618
+                    enum mv_pred_t mode, enum block_t size, int ref) {
619
+    vector_t *mvP = &h->mv[nP];
620
+    vector_t *mvA = &h->mv[nP-1];
621
+    vector_t *mvB = &h->mv[nP-4];
622
+    vector_t *mvC = &h->mv[nC];
623
+    int mvAref = mvA->ref;
624
+    int mvBref = mvB->ref;
625
+    int mvCref;
626
+
627
+    mvP->ref = ref;
628
+    mvP->dist = h->dist[mvP->ref];
629
+    if(mvC->ref == NOT_AVAIL)
630
+        mvC = &h->mv[nP-5]; // set to top-left (mvD)
631
+    mvCref = mvC->ref;
632
+    if(mode == MV_PRED_PSKIP) {
633
+        if((mvAref == NOT_AVAIL) || (mvBref == NOT_AVAIL) ||
634
+           ((mvA->x | mvA->y | mvA->ref) == 0)  ||
635
+           ((mvB->x | mvB->y | mvB->ref) == 0) ) {
636
+            mvP->x = mvP->y = 0;
637
+            set_mvs(mvP,size);
638
+            return;
639
+        }
640
+    }
641
+    /* if there is only one suitable candidate, take it */
642
+    if((mvAref >= 0) && (mvBref < 0) && (mvCref < 0)) {
643
+        mvP->x = mvA->x;
644
+        mvP->y = mvA->y;
645
+    } else if((mvAref < 0) && (mvBref >= 0) && (mvCref < 0)) {
646
+        mvP->x = mvB->x;
647
+        mvP->y = mvB->y;
648
+    } else if((mvAref < 0) && (mvBref < 0) && (mvCref >= 0)) {
649
+        mvP->x = mvC->x;
650
+        mvP->y = mvC->y;
651
+    } else {
652
+        switch(mode) {
653
+        case MV_PRED_LEFT:
654
+            if(mvAref == mvP->ref) {
655
+                mvP->x = mvA->x;
656
+                mvP->y = mvA->y;
657
+            } else
658
+                mv_pred_median(h, mvP, mvA, mvB, mvC);
659
+            break;
660
+        case MV_PRED_TOP:
661
+            if(mvBref == mvP->ref) {
662
+                mvP->x = mvB->x;
663
+                mvP->y = mvB->y;
664
+            } else
665
+                mv_pred_median(h, mvP, mvA, mvB, mvC);
666
+            break;
667
+        case MV_PRED_TOPRIGHT:
668
+            if(mvCref == mvP->ref) {
669
+                mvP->x = mvC->x;
670
+                mvP->y = mvC->y;
671
+            } else
672
+                mv_pred_median(h, mvP, mvA, mvB, mvC);
673
+            break;
674
+        default:
675
+            mv_pred_median(h, mvP, mvA, mvB, mvC);
676
+            break;
677
+        }
678
+    }
679
+    if(mode < MV_PRED_PSKIP) {
680
+        mvP->x += get_se_golomb(&h->s.gb);
681
+        mvP->y += get_se_golomb(&h->s.gb);
682
+    }
683
+    set_mvs(mvP,size);
684
+}
685
+
686
+/*****************************************************************************
687
+ *
688
+ * residual data decoding
689
+ *
690
+ ****************************************************************************/
691
+
692
+/* kth-order exponential golomb code */
693
+static inline int get_ue_code(GetBitContext *gb, int order) {
694
+    if(order)
695
+        return (get_ue_golomb(gb) << order) + get_bits(gb,order);
696
+    return get_ue_golomb(gb);
697
+}
698
+
699
+static int decode_residual_block(AVSContext *h, GetBitContext *gb,
700
+                                 const residual_vlc_t *r, int esc_golomb_order,
701
+                                 int qp, uint8_t *dst, int stride) {
702
+    int i,pos = -1;
703
+    int level_code, esc_code, level, run, mask;
704
+    int level_buf[64];
705
+    int run_buf[64];
706
+    int dqm = dequant_mul[qp];
707
+    int dqs = dequant_shift[qp];
708
+    int dqa = 1 << (dqs - 1);
709
+    const uint8_t *scantab = ff_zigzag_direct;
710
+    DCTELEM block[64];
711
+
712
+    memset(block,0,64*sizeof(DCTELEM));
713
+    for(i=0;i<65;i++) {
714
+        level_code = get_ue_code(gb,r->golomb_order);
715
+        if(level_code >= ESCAPE_CODE) {
716
+            run = (level_code - ESCAPE_CODE) >> 1;
717
+            esc_code = get_ue_code(gb,esc_golomb_order);
718
+            level = esc_code + (run > r->max_run ? 1 : r->level_add[run]);
719
+            while(level > r->inc_limit)
720
+                r++;
721
+            mask = -(level_code & 1);
722
+            level = (level^mask) - mask;
723
+        } else {
724
+            if(level_code < 0)
725
+                return -1;
726
+            level = r->rltab[level_code][0];
727
+            if(!level) //end of block signal
728
+                break;
729
+            run   = r->rltab[level_code][1];
730
+            r += r->rltab[level_code][2];
731
+        }
732
+        level_buf[i] = level;
733
+        run_buf[i] = run;
734
+    }
735
+    /* inverse scan and dequantization */
736
+    for(i=i-1;i>=0;i--) {
737
+        pos += 1 + run_buf[i];
738
+        if(pos > 63) {
739
+            av_log(h->s.avctx, AV_LOG_ERROR,
740
+                   "position out of block bounds at pic %d MB(%d,%d)\n",
741
+                   h->picture.poc, h->mbx, h->mby);
742
+            return -1;
743
+        }
744
+        block[scantab[pos]] = (level_buf[i]*dqm + dqa) >> dqs;
745
+    }
746
+    h->s.dsp.cavs_idct8_add(dst,block,stride);
747
+    return 0;
748
+}
749
+
750
+
751
+static inline void decode_residual_chroma(AVSContext *h) {
752
+    if(h->cbp & (1<<4))
753
+        decode_residual_block(h,&h->s.gb,chroma_2dvlc,0, chroma_qp[h->qp],
754
+                              h->cu,h->c_stride);
755
+    if(h->cbp & (1<<5))
756
+        decode_residual_block(h,&h->s.gb,chroma_2dvlc,0, chroma_qp[h->qp],
757
+                              h->cv,h->c_stride);
758
+}
759
+
760
+static inline void decode_residual_inter(AVSContext *h) {
761
+    int block;
762
+
763
+    /* get coded block pattern */
764
+    h->cbp = cbp_tab[get_ue_golomb(&h->s.gb)][1];
765
+    /* get quantizer */
766
+    if(h->cbp && !h->qp_fixed)
767
+        h->qp += get_se_golomb(&h->s.gb);
768
+    for(block=0;block<4;block++)
769
+        if(h->cbp & (1<<block))
770
+            decode_residual_block(h,&h->s.gb,inter_2dvlc,0,h->qp,
771
+                                  h->cy + h->luma_scan[block], h->l_stride);
772
+    decode_residual_chroma(h);
773
+}
774
+
775
+/*****************************************************************************
776
+ *
777
+ * macroblock level
778
+ *
779
+ ****************************************************************************/
780
+
781
+static inline void init_mb(AVSContext *h) {
782
+    int i;
783
+
784
+    /* copy predictors from top line (MB B and C) into cache */
785
+    for(i=0;i<3;i++) {
786
+        veccpy(&h->mv[MV_FWD_B2+i],&h->top_mv[0][h->mbx*2+i]);
787
+        veccpy(&h->mv[MV_BWD_B2+i],&h->top_mv[1][h->mbx*2+i]);
788
+    }
789
+    h->pred_mode_Y[1] = h->top_pred_Y[h->mbx*2+0];
790
+    h->pred_mode_Y[2] = h->top_pred_Y[h->mbx*2+1];
791
+    /* clear top predictors if MB B is not available */
792
+    if(!(h->flags & B_AVAIL)) {
793
+        veccpy(&h->mv[MV_FWD_B2],(vector_t *)&un_mv);
794
+        veccpy(&h->mv[MV_FWD_B3],(vector_t *)&un_mv);
795
+        veccpy(&h->mv[MV_BWD_B2],(vector_t *)&un_mv);
796
+        veccpy(&h->mv[MV_BWD_B3],(vector_t *)&un_mv);
797
+        h->pred_mode_Y[1] = h->pred_mode_Y[2] = NOT_AVAIL;
798
+        h->flags &= ~(C_AVAIL|D_AVAIL);
799
+    } else if(h->mbx) {
800
+        h->flags |= D_AVAIL;
801
+    }
802
+    if(h->mbx == h->mb_width-1) //MB C not available
803
+        h->flags &= ~C_AVAIL;
804
+    /* clear top-right predictors if MB C is not available */
805
+    if(!(h->flags & C_AVAIL)) {
806
+        veccpy(&h->mv[MV_FWD_C2],(vector_t *)&un_mv);
807
+        veccpy(&h->mv[MV_BWD_C2],(vector_t *)&un_mv);
808
+    }
809
+    /* clear top-left predictors if MB D is not available */
810
+    if(!(h->flags & D_AVAIL)) {
811
+        veccpy(&h->mv[MV_FWD_D3],(vector_t *)&un_mv);
812
+        veccpy(&h->mv[MV_BWD_D3],(vector_t *)&un_mv);
813
+    }
814
+    /* set pointer for co-located macroblock type */
815
+    h->col_type = &h->col_type_base[h->mby*h->mb_width + h->mbx];
816
+}
817
+
818
+static inline void check_for_slice(AVSContext *h);
819
+
820
+static inline int next_mb(AVSContext *h) {
821
+    int i;
822
+
823
+    h->flags |= A_AVAIL;
824
+    h->cy += 16;
825
+    h->cu += 8;
826
+    h->cv += 8;
827
+    /* copy mvs as predictors to the left */
828
+    for(i=0;i<=20;i+=4)
829
+        veccpy(&h->mv[i],&h->mv[i+2]);
830
+    /* copy bottom mvs from cache to top line */
831
+    veccpy(&h->top_mv[0][h->mbx*2+0],&h->mv[MV_FWD_X2]);
832
+    veccpy(&h->top_mv[0][h->mbx*2+1],&h->mv[MV_FWD_X3]);
833
+    veccpy(&h->top_mv[1][h->mbx*2+0],&h->mv[MV_BWD_X2]);
834
+    veccpy(&h->top_mv[1][h->mbx*2+1],&h->mv[MV_BWD_X3]);
835
+    /* next MB address */
836
+    h->mbx++;
837
+    if(h->mbx == h->mb_width) { //new mb line
838
+        h->flags = B_AVAIL|C_AVAIL;
839
+        /* clear left pred_modes */
840
+        h->pred_mode_Y[3] = h->pred_mode_Y[6] = NOT_AVAIL;
841
+        /* clear left mv predictors */
842
+        for(i=0;i<=20;i+=4)
843
+            veccpy(&h->mv[i],(vector_t *)&un_mv);
844
+        h->mbx = 0;
845
+        h->mby++;
846
+        /* re-calculate sample pointers */
847
+        h->cy = h->picture.data[0] + h->mby*16*h->l_stride;
848
+        h->cu = h->picture.data[1] + h->mby*8*h->c_stride;
849
+        h->cv = h->picture.data[2] + h->mby*8*h->c_stride;
850
+        if(h->mby == h->mb_height) { //frame end
851
+            return 0;
852
+        } else {
853
+            //check_for_slice(h);
854
+        }
855
+    }
856
+    return 1;
857
+}
858
+
859
+static void decode_mb_i(AVSContext *h, int is_i_pic) {
860
+    GetBitContext *gb = &h->s.gb;
861
+    int block, pred_mode_uv;
862
+    uint8_t top[18];
863
+    uint8_t left[18];
864
+    uint8_t *d;
865
+
866
+    /* get intra prediction modes from stream */
867
+    for(block=0;block<4;block++) {
868
+        int nA,nB,predpred;
869
+        int pos = scan3x3[block];
870
+
871
+        nA = h->pred_mode_Y[pos-1];
872
+        nB = h->pred_mode_Y[pos-3];
873
+        if((nA == NOT_AVAIL) || (nB == NOT_AVAIL))
874
+            predpred = 2;
875
+        else
876
+            predpred = FFMIN(nA,nB);
877
+        if(get_bits1(gb))
878
+            h->pred_mode_Y[pos] = predpred;
879
+        else {
880
+            h->pred_mode_Y[pos] = get_bits(gb,2);
881
+            if(h->pred_mode_Y[pos] >= predpred)
882
+                h->pred_mode_Y[pos]++;
883
+        }
884
+    }
885
+    pred_mode_uv = get_ue_golomb(gb);
886
+    if(pred_mode_uv > 6) {
887
+        av_log(h->s.avctx, AV_LOG_ERROR, "illegal intra chroma pred mode\n");
888
+        pred_mode_uv = 0;
889
+    }
890
+
891
+    /* save pred modes before they get modified */
892
+    h->pred_mode_Y[3] =  h->pred_mode_Y[5];
893
+    h->pred_mode_Y[6] =  h->pred_mode_Y[8];
894
+    h->top_pred_Y[h->mbx*2+0] = h->pred_mode_Y[7];
895
+    h->top_pred_Y[h->mbx*2+1] = h->pred_mode_Y[8];
896
+
897
+    /* modify pred modes according to availability of neighbour samples */
898
+    if(!(h->flags & A_AVAIL)) {
899
+        modify_pred(left_modifier_l, &h->pred_mode_Y[4] );
900
+        modify_pred(left_modifier_l, &h->pred_mode_Y[7] );
901
+        modify_pred(left_modifier_c, &pred_mode_uv );
902
+    }
903
+    if(!(h->flags & B_AVAIL)) {
904
+        modify_pred(top_modifier_l, &h->pred_mode_Y[4] );
905
+        modify_pred(top_modifier_l, &h->pred_mode_Y[5] );
906
+        modify_pred(top_modifier_c, &pred_mode_uv );
907
+    }
908
+
909
+    /* get coded block pattern */
910
+    if(is_i_pic)
911
+        h->cbp = cbp_tab[get_ue_golomb(gb)][0];
912
+    if(h->cbp && !h->qp_fixed)
913
+        h->qp += get_se_golomb(gb); //qp_delta
914
+
915
+    /* luma intra prediction interleaved with residual decode/transform/add */
916
+    for(block=0;block<4;block++) {
917
+        d = h->cy + h->luma_scan[block];
918
+        load_intra_pred_luma(h, top, left, block);
919
+        h->intra_pred_l[(int)h->pred_mode_Y[scan3x3[block]]]
920
+            (d, top, left, h->l_stride);
921
+        if(h->cbp & (1<<block))
922
+            decode_residual_block(h,gb,intra_2dvlc,1,h->qp,d,h->l_stride);
923
+    }
924
+
925
+    /* chroma intra prediction */
926
+    load_intra_pred_chroma(&h->top_border_u[h->mbx*8], h->left_border_u,
927
+                           h->topleft_border_u, top, left, h->c_stride, h->flags);
928
+    h->intra_pred_c[pred_mode_uv](h->cu, top, left, h->c_stride);
929
+    load_intra_pred_chroma(&h->top_border_v[h->mbx*8], h->left_border_v,
930
+                           h->topleft_border_v, top, left, h->c_stride, h->flags);
931
+    h->intra_pred_c[pred_mode_uv](h->cv, top, left, h->c_stride);
932
+
933
+    decode_residual_chroma(h);
934
+    filter_mb(h,I_8X8);
935
+
936
+    /* mark motion vectors as intra */
937
+    veccpy( &h->mv[MV_FWD_X0], (vector_t *)&intra_mv);
938
+    set_mvs(&h->mv[MV_FWD_X0], BLK_16X16);
939
+    veccpy( &h->mv[MV_BWD_X0], (vector_t *)&intra_mv);
940
+    set_mvs(&h->mv[MV_BWD_X0], BLK_16X16);
941
+    if(h->pic_type != FF_B_TYPE)
942
+        *h->col_type = I_8X8;
943
+}
944
+
945
+static void mb_skip_p(AVSContext *h) {
946
+    mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_PSKIP, BLK_16X16, 0);
947
+    inter_pred(h);
948
+    store_mvs(h);
949
+    filter_mb(h,P_SKIP);
950
+    *h->col_type = P_SKIP;
951
+}
952
+
953
+
954
+static void mb_skip_b(AVSContext *h) {
955
+    int i;
956
+
957
+    if(!(*h->col_type)) {
958
+        /* intra MB at co-location, do in-plane prediction */
959
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_BSKIP, BLK_16X16, 1);
960
+        mv_pred(h, MV_BWD_X0, MV_BWD_C2, MV_PRED_BSKIP, BLK_16X16, 0);
961
+    } else {
962
+        /* direct prediction from co-located P MB, block-wise */
963
+        for(i=0;i<4;i++)
964
+            mv_pred_direct(h,&h->mv[mv_scan[i]],
965
+                           &h->mv[mv_scan[i]+MV_BWD_OFFS],
966
+                           &h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + i]);
967
+    }
968
+}
969
+
970
+static void decode_mb_p(AVSContext *h, enum mb_t mb_type) {
971
+    GetBitContext *gb = &h->s.gb;
972
+    int ref[4];
973
+
974
+    switch(mb_type) {
975
+    case P_SKIP:
976
+        mb_skip_p(h);
977
+        return;
978
+    case P_16X16:
979
+        ref[0] = h->ref_flag ? 0 : get_bits1(gb);
980
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_MEDIAN,   BLK_16X16,ref[0]);
981
+        break;
982
+    case P_16X8:
983
+        ref[0] = h->ref_flag ? 0 : get_bits1(gb);
984
+        ref[2] = h->ref_flag ? 0 : get_bits1(gb);
985
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_TOP,      BLK_16X8, ref[0]);
986
+        mv_pred(h, MV_FWD_X2, MV_FWD_A1, MV_PRED_LEFT,     BLK_16X8, ref[2]);
987
+        break;
988
+    case P_8X16:
989
+        ref[0] = h->ref_flag ? 0 : get_bits1(gb);
990
+        ref[1] = h->ref_flag ? 0 : get_bits1(gb);
991
+        mv_pred(h, MV_FWD_X0, MV_FWD_B3, MV_PRED_LEFT,     BLK_8X16, ref[0]);
992
+        mv_pred(h, MV_FWD_X1, MV_FWD_C2, MV_PRED_TOPRIGHT, BLK_8X16, ref[1]);
993
+        break;
994
+    case P_8X8:
995
+        ref[0] = h->ref_flag ? 0 : get_bits1(gb);
996
+        ref[1] = h->ref_flag ? 0 : get_bits1(gb);
997
+        ref[2] = h->ref_flag ? 0 : get_bits1(gb);
998
+        ref[3] = h->ref_flag ? 0 : get_bits1(gb);
999
+        mv_pred(h, MV_FWD_X0, MV_FWD_B3, MV_PRED_MEDIAN,   BLK_8X8, ref[0]);
1000
+        mv_pred(h, MV_FWD_X1, MV_FWD_C2, MV_PRED_MEDIAN,   BLK_8X8, ref[1]);
1001
+        mv_pred(h, MV_FWD_X2, MV_FWD_X1, MV_PRED_MEDIAN,   BLK_8X8, ref[2]);
1002
+        mv_pred(h, MV_FWD_X3, MV_FWD_X0, MV_PRED_MEDIAN,   BLK_8X8, ref[3]);
1003
+    }
1004
+    inter_pred(h);
1005
+    store_mvs(h);
1006
+    decode_residual_inter(h);
1007
+    filter_mb(h,mb_type);
1008
+    *h->col_type = mb_type;
1009
+}
1010
+
1011
+static void decode_mb_b(AVSContext *h, enum mb_t mb_type) {
1012
+    int block;
1013
+    enum sub_mb_t sub_type[4];
1014
+    int flags;
1015
+
1016
+    /* reset all MVs */
1017
+    veccpy( &h->mv[MV_FWD_X0], (vector_t *)&dir_mv);
1018
+    set_mvs(&h->mv[MV_FWD_X0], BLK_16X16);
1019
+    veccpy( &h->mv[MV_BWD_X0], (vector_t *)&dir_mv);
1020
+    set_mvs(&h->mv[MV_BWD_X0], BLK_16X16);
1021
+    switch(mb_type) {
1022
+    case B_SKIP:
1023
+        mb_skip_b(h);
1024
+        inter_pred(h);
1025
+        filter_mb(h,B_SKIP);
1026
+        return;
1027
+    case B_DIRECT:
1028
+        mb_skip_b(h);
1029
+        break;
1030
+    case B_FWD_16X16:
1031
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_MEDIAN, BLK_16X16, 1);
1032
+        break;
1033
+    case B_SYM_16X16:
1034
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_MEDIAN, BLK_16X16, 1);
1035
+        mv_pred_sym(h, &h->mv[MV_FWD_X0], BLK_16X16);
1036
+        break;
1037
+    case B_BWD_16X16:
1038
+        mv_pred(h, MV_BWD_X0, MV_BWD_C2, MV_PRED_MEDIAN, BLK_16X16, 0);
1039
+        break;
1040
+    case B_8X8:
1041
+        for(block=0;block<4;block++)
1042
+            sub_type[block] = get_bits(&h->s.gb,2);
1043
+        for(block=0;block<4;block++) {
1044
+            switch(sub_type[block]) {
1045
+            case B_SUB_DIRECT:
1046
+                if(!(*h->col_type)) {
1047
+                    /* intra MB at co-location, do in-plane prediction */
1048
+                    mv_pred(h, mv_scan[block], mv_scan[block]-3,
1049
+                            MV_PRED_BSKIP, BLK_8X8, 1);
1050
+                    mv_pred(h, mv_scan[block]+MV_BWD_OFFS,
1051
+                            mv_scan[block]-3+MV_BWD_OFFS,
1052
+                            MV_PRED_BSKIP, BLK_8X8, 0);
1053
+                } else
1054
+                    mv_pred_direct(h,&h->mv[mv_scan[block]],
1055
+                                   &h->mv[mv_scan[block]+MV_BWD_OFFS],
1056
+                                   &h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + block]);
1057
+                break;
1058
+            case B_SUB_FWD:
1059
+                mv_pred(h, mv_scan[block], mv_scan[block]-3,
1060
+                        MV_PRED_MEDIAN, BLK_8X8, 1);
1061
+                break;
1062
+            case B_SUB_SYM:
1063
+                mv_pred(h, mv_scan[block], mv_scan[block]-3,
1064
+                        MV_PRED_MEDIAN, BLK_8X8, 1);
1065
+                mv_pred_sym(h, &h->mv[mv_scan[block]], BLK_8X8);
1066
+                break;
1067
+            }
1068
+        }
1069
+        for(block=0;block<4;block++) {
1070
+            if(sub_type[block] == B_SUB_BWD)
1071
+                mv_pred(h, mv_scan[block]+MV_BWD_OFFS,
1072
+                        mv_scan[block]+MV_BWD_OFFS-3,
1073
+                        MV_PRED_MEDIAN, BLK_8X8, 0);
1074
+        }
1075
+        break;
1076
+    default:
1077
+        assert((mb_type > B_SYM_16X16) && (mb_type < B_8X8));
1078
+        flags = b_partition_flags[(mb_type-1)>>1];
1079
+        if(mb_type & 1) { /* 16x8 macroblock types */
1080
+            if(flags & FWD0)
1081
+                mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_TOP,  BLK_16X8, 1);
1082
+            if(flags & SYM0) {
1083
+                mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_TOP,  BLK_16X8, 1);
1084
+                mv_pred_sym(h, &h->mv[MV_FWD_X0], BLK_16X8);
1085
+            }
1086
+            if(flags & FWD1)
1087
+                mv_pred(h, MV_FWD_X2, MV_FWD_A1, MV_PRED_LEFT, BLK_16X8, 1);
1088
+            if(flags & SYM1) {
1089
+                mv_pred(h, MV_FWD_X2, MV_FWD_A1, MV_PRED_LEFT, BLK_16X8, 1);
1090
+                mv_pred_sym(h, &h->mv[9], BLK_16X8);
1091
+            }
1092
+            if(flags & BWD0)
1093
+                mv_pred(h, MV_BWD_X0, MV_BWD_C2, MV_PRED_TOP,  BLK_16X8, 0);
1094
+            if(flags & BWD1)
1095
+                mv_pred(h, MV_BWD_X2, MV_BWD_A1, MV_PRED_LEFT, BLK_16X8, 0);
1096
+        } else {          /* 8x16 macroblock types */
1097
+            if(flags & FWD0)
1098
+                mv_pred(h, MV_FWD_X0, MV_FWD_B3, MV_PRED_LEFT, BLK_8X16, 1);
1099
+            if(flags & SYM0) {
1100
+                mv_pred(h, MV_FWD_X0, MV_FWD_B3, MV_PRED_LEFT, BLK_8X16, 1);
1101
+                mv_pred_sym(h, &h->mv[MV_FWD_X0], BLK_8X16);
1102
+            }
1103
+            if(flags & FWD1)
1104
+                mv_pred(h, MV_FWD_X1, MV_FWD_C2, MV_PRED_TOPRIGHT,BLK_8X16, 1);
1105
+            if(flags & SYM1) {
1106
+                mv_pred(h, MV_FWD_X1, MV_FWD_C2, MV_PRED_TOPRIGHT,BLK_8X16, 1);
1107
+                mv_pred_sym(h, &h->mv[6], BLK_8X16);
1108
+            }
1109
+            if(flags & BWD0)
1110
+                mv_pred(h, MV_BWD_X0, MV_BWD_B3, MV_PRED_LEFT, BLK_8X16, 0);
1111
+            if(flags & BWD1)
1112
+                mv_pred(h, MV_BWD_X1, MV_BWD_C2, MV_PRED_TOPRIGHT,BLK_8X16, 0);
1113
+        }
1114
+    }
1115
+    inter_pred(h);
1116
+    decode_residual_inter(h);
1117
+    filter_mb(h,mb_type);
1118
+}
1119
+
1120
+/*****************************************************************************
1121
+ *
1122
+ * slice level
1123
+ *
1124
+ ****************************************************************************/
1125
+
1126
+static inline int decode_slice_header(AVSContext *h, GetBitContext *gb) {
1127
+    if(h->stc > 0xAF)
1128
+        av_log(h->s.avctx, AV_LOG_ERROR, "unexpected start code 0x%02x\n", h->stc);
1129
+    h->mby = h->stc;
1130
+    if((h->mby == 0) && (!h->qp_fixed)){
1131
+        h->qp_fixed = get_bits1(gb);
1132
+        h->qp = get_bits(gb,6);
1133
+    }
1134
+    /* inter frame or second slice can have weighting params */
1135
+    if((h->pic_type != FF_I_TYPE) || (!h->pic_structure && h->mby >= h->mb_width/2))
1136
+        if(get_bits1(gb)) { //slice_weighting_flag
1137
+            av_log(h->s.avctx, AV_LOG_ERROR,
1138
+                   "weighted prediction not yet supported\n");
1139
+        }
1140
+    return 0;
1141
+}
1142
+
1143
+static inline void check_for_slice(AVSContext *h) {
1144
+    GetBitContext *gb = &h->s.gb;
1145
+    int align;
1146
+    align = (-get_bits_count(gb)) & 7;
1147
+    if((show_bits_long(gb,24+align) & 0xFFFFFF) == 0x000001) {
1148
+        get_bits_long(gb,24+align);
1149
+        h->stc = get_bits(gb,8);
1150
+        decode_slice_header(h,gb);
1151
+    }
1152
+}
1153
+
1154
+/*****************************************************************************
1155
+ *
1156
+ * frame level
1157
+ *
1158
+ ****************************************************************************/
1159
+
1160
+static void init_pic(AVSContext *h) {
1161
+    int i;
1162
+
1163
+    /* clear some predictors */
1164
+    for(i=0;i<=20;i+=4)
1165
+        veccpy(&h->mv[i],(vector_t *)&un_mv);
1166
+    veccpy(&h->mv[MV_BWD_X0], (vector_t *)&dir_mv);
1167
+    set_mvs(&h->mv[MV_BWD_X0], BLK_16X16);
1168
+    veccpy(&h->mv[MV_FWD_X0], (vector_t *)&dir_mv);
1169
+    set_mvs(&h->mv[MV_FWD_X0], BLK_16X16);
1170
+    h->pred_mode_Y[3] = h->pred_mode_Y[6] = NOT_AVAIL;
1171
+    h->cy = h->picture.data[0];
1172
+    h->cu = h->picture.data[1];
1173
+    h->cv = h->picture.data[2];
1174
+    h->l_stride = h->picture.linesize[0];
1175
+    h->c_stride = h->picture.linesize[1];
1176
+    h->luma_scan[2] = 8*h->l_stride;
1177
+    h->luma_scan[3] = 8*h->l_stride+8;
1178
+    h->mbx = h->mby = 0;
1179
+    h->flags = 0;
1180
+}
1181
+
1182
+static int decode_pic(AVSContext *h) {
1183
+    MpegEncContext *s = &h->s;
1184
+    int i,skip_count;
1185
+    enum mb_t mb_type;
1186
+
1187
+    if (!s->context_initialized) {
1188
+        if (MPV_common_init(s) < 0)
1189
+            return -1;
1190
+    }
1191
+    get_bits(&s->gb,16);//bbv_dwlay
1192
+    if(h->stc == PIC_PB_START_CODE) {
1193
+        h->pic_type = get_bits(&s->gb,2) + FF_I_TYPE;
1194
+        /* make sure we have the reference frames we need */
1195
+        if(!h->DPB[0].data[0] ||
1196
+          (!h->DPB[1].data[0] && h->pic_type == FF_B_TYPE))
1197
+            return -1;
1198
+    } else {
1199
+        h->pic_type = FF_I_TYPE;
1200
+        if(get_bits1(&s->gb))
1201
+            get_bits(&s->gb,16);//time_code
1202
+    }
1203
+    /* release last B frame */
1204
+    if(h->picture.data[0])
1205
+        s->avctx->release_buffer(s->avctx, (AVFrame *)&h->picture);
1206
+
1207
+    s->avctx->get_buffer(s->avctx, (AVFrame *)&h->picture);
1208
+    init_pic(h);
1209
+    h->picture.poc = get_bits(&s->gb,8)*2;
1210
+
1211
+    /* get temporal distances and MV scaling factors */
1212
+    if(h->pic_type != FF_B_TYPE) {
1213
+        h->dist[0] = (h->picture.poc - h->DPB[0].poc  + 512) % 512;
1214
+    } else {
1215
+        h->dist[0] = (h->DPB[0].poc  - h->picture.poc + 512) % 512;
1216
+    }
1217
+    h->dist[1] = (h->picture.poc - h->DPB[1].poc  + 512) % 512;
1218
+    h->scale_den[0] = h->dist[0] ? 512/h->dist[0] : 0;
1219
+    h->scale_den[1] = h->dist[1] ? 512/h->dist[1] : 0;
1220
+    if(h->pic_type == FF_B_TYPE) {
1221
+        h->sym_factor = h->dist[0]*h->scale_den[1];
1222
+    } else {
1223
+        h->direct_den[0] = h->dist[0] ? 16384/h->dist[0] : 0;
1224
+        h->direct_den[1] = h->dist[1] ? 16384/h->dist[1] : 0;
1225
+    }
1226
+
1227
+    if(s->low_delay)
1228
+        get_ue_golomb(&s->gb); //bbv_check_times
1229
+    h->progressive             = get_bits1(&s->gb);
1230
+    if(h->progressive)
1231
+        h->pic_structure = 1;
1232
+    else if(!(h->pic_structure = get_bits1(&s->gb) && (h->stc == PIC_PB_START_CODE)) )
1233
+        get_bits1(&s->gb);     //advanced_pred_mode_disable
1234
+    skip_bits1(&s->gb);        //top_field_first
1235
+    skip_bits1(&s->gb);        //repeat_first_field
1236
+    h->qp_fixed                = get_bits1(&s->gb);
1237
+    h->qp                      = get_bits(&s->gb,6);
1238
+    if(h->pic_type == FF_I_TYPE) {
1239
+        if(!h->progressive && !h->pic_structure)
1240
+            skip_bits1(&s->gb);//what is this?
1241
+        skip_bits(&s->gb,4);   //reserved bits
1242
+    } else {
1243
+        if(!(h->pic_type == FF_B_TYPE && h->pic_structure == 1))
1244
+            h->ref_flag        = get_bits1(&s->gb);
1245
+        skip_bits(&s->gb,4);   //reserved bits
1246
+        h->skip_mode_flag      = get_bits1(&s->gb);
1247
+    }
1248
+    h->loop_filter_disable     = get_bits1(&s->gb);
1249
+    if(!h->loop_filter_disable && get_bits1(&s->gb)) {
1250
+        h->alpha_offset        = get_se_golomb(&s->gb);
1251
+        h->beta_offset         = get_se_golomb(&s->gb);
1252
+    } else {
1253
+        h->alpha_offset = h->beta_offset  = 0;
1254
+    }
1255
+    check_for_slice(h);
1256
+    if(h->pic_type == FF_I_TYPE) {
1257
+        do {
1258
+            init_mb(h);
1259
+            decode_mb_i(h,1);
1260
+        } while(next_mb(h));
1261
+    } else if(h->pic_type == FF_P_TYPE) {
1262
+        do {
1263
+            if(h->skip_mode_flag) {
1264
+                skip_count = get_ue_golomb(&s->gb);
1265
+                for(i=0;i<skip_count;i++) {
1266
+                    init_mb(h);
1267
+                    mb_skip_p(h);
1268
+                    if(!next_mb(h))
1269
+                        goto done;
1270
+                }
1271
+                mb_type = get_ue_golomb(&s->gb) + P_16X16;
1272
+            } else {
1273
+                mb_type = get_ue_golomb(&s->gb) + P_SKIP;
1274
+            }
1275
+            init_mb(h);
1276
+            if(mb_type > P_8X8) {
1277
+                h->cbp = cbp_tab[mb_type - P_8X8 - 1][0];
1278
+                decode_mb_i(h,0);
1279
+            } else {
1280
+                decode_mb_p(h,mb_type);
1281
+            }
1282
+        } while(next_mb(h));
1283
+    } else { //FF_B_TYPE
1284
+        do {
1285
+            if(h->skip_mode_flag) {
1286
+                skip_count = get_ue_golomb(&s->gb);
1287
+                for(i=0;i<skip_count;i++) {
1288
+                    init_mb(h);
1289
+                    mb_skip_b(h);
1290
+                    inter_pred(h);
1291
+                    filter_mb(h,B_SKIP);
1292
+                    if(!next_mb(h))
1293
+                        goto done;
1294
+                }
1295
+                mb_type = get_ue_golomb(&s->gb) + B_DIRECT;
1296
+            } else {
1297
+                mb_type = get_ue_golomb(&s->gb) + B_SKIP;
1298
+            }
1299
+            init_mb(h);
1300
+            if(mb_type > B_8X8) {
1301
+                h->cbp = cbp_tab[mb_type - B_8X8 - 1][0];
1302
+                decode_mb_i(h,0);
1303
+            } else {
1304
+                decode_mb_b(h,mb_type);
1305
+            }
1306
+        } while(next_mb(h));
1307
+    }
1308
+ done:
1309
+    if(h->pic_type != FF_B_TYPE) {
1310
+        if(h->DPB[1].data[0])
1311
+            s->avctx->release_buffer(s->avctx, (AVFrame *)&h->DPB[1]);
1312
+        memcpy(&h->DPB[1], &h->DPB[0], sizeof(Picture));
1313
+        memcpy(&h->DPB[0], &h->picture, sizeof(Picture));
1314
+        memset(&h->picture,0,sizeof(Picture));
1315
+    }
1316
+    return 0;
1317
+}
1318
+
1319
+/*****************************************************************************
1320
+ *
1321
+ * headers and interface
1322
+ *
1323
+ ****************************************************************************/
1324
+
1325
+static void init_top_lines(AVSContext *h) {
1326
+    /* alloc top line of predictors */
1327
+    h->top_qp       = av_malloc( h->mb_width);
1328
+    h->top_mv[0]    = av_malloc((h->mb_width*2+1)*sizeof(vector_t));
1329
+    h->top_mv[1]    = av_malloc((h->mb_width*2+1)*sizeof(vector_t));
1330
+    h->top_pred_Y   = av_malloc( h->mb_width*2*sizeof(int));
1331
+    h->top_border_y = av_malloc((h->mb_width+1)*16);
1332
+    h->top_border_u = av_malloc((h->mb_width+1)*8);
1333
+    h->top_border_v = av_malloc((h->mb_width+1)*8);
1334
+
1335
+    /* alloc space for co-located MVs and types */
1336
+    h->col_mv       = av_malloc( h->mb_width*h->mb_height*4*sizeof(vector_t));
1337
+    h->col_type_base = av_malloc(h->mb_width*h->mb_height);
1338
+}
1339
+
1340
+static int decode_seq_header(AVSContext *h) {
1341
+    MpegEncContext *s = &h->s;
1342
+    extern const AVRational frame_rate_tab[];
1343
+    int frame_rate_code;
1344
+
1345
+    h->profile =         get_bits(&s->gb,8);
1346
+    h->level =           get_bits(&s->gb,8);
1347
+    skip_bits1(&s->gb); //progressive sequence
1348
+    s->width =           get_bits(&s->gb,14);
1349
+    s->height =          get_bits(&s->gb,14);
1350
+    skip_bits(&s->gb,2); //chroma format
1351
+    skip_bits(&s->gb,3); //sample_precision
1352
+    h->aspect_ratio =    get_bits(&s->gb,4);
1353
+    frame_rate_code =    get_bits(&s->gb,4);
1354
+    skip_bits(&s->gb,18);//bit_rate_lower
1355
+    skip_bits1(&s->gb);  //marker_bit
1356
+    skip_bits(&s->gb,12);//bit_rate_upper
1357
+    s->low_delay =       get_bits1(&s->gb);
1358
+    h->mb_width  = (s->width  + 15) >> 4;
1359
+    h->mb_height = (s->height + 15) >> 4;
1360
+    h->s.avctx->time_base.den = frame_rate_tab[frame_rate_code].num;
1361
+    h->s.avctx->time_base.num = frame_rate_tab[frame_rate_code].den;
1362
+    h->s.avctx->width  = s->width;
1363
+    h->s.avctx->height = s->height;
1364
+    if(!h->top_qp)
1365
+        init_top_lines(h);
1366
+    return 0;
1367
+}
1368
+
1369
+/**
1370
+ * finds the end of the current frame in the bitstream.
1371
+ * @return the position of the first byte of the next frame, or -1
1372
+ */
1373
+int ff_cavs_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size) {
1374
+    int pic_found, i;
1375
+    uint32_t state;
1376
+
1377
+    pic_found= pc->frame_start_found;
1378
+    state= pc->state;
1379
+
1380
+    i=0;
1381
+    if(!pic_found){
1382
+        for(i=0; i<buf_size; i++){
1383
+            state= (state<<8) | buf[i];
1384
+            if(state == PIC_I_START_CODE || state == PIC_PB_START_CODE){
1385
+                i++;
1386
+                pic_found=1;
1387
+                break;
1388
+            }
1389
+        }
1390
+    }
1391
+
1392
+    if(pic_found){
1393
+        /* EOF considered as end of frame */
1394
+        if (buf_size == 0)
1395
+            return 0;
1396
+        for(; i<buf_size; i++){
1397
+            state= (state<<8) | buf[i];
1398
+            if((state&0xFFFFFF00) == 0x100){
1399
+                if(state < SLICE_MIN_START_CODE || state > SLICE_MAX_START_CODE){
1400
+                    pc->frame_start_found=0;
1401
+                    pc->state=-1;
1402
+                    return i-3;
1403
+                }
1404
+            }
1405
+        }
1406
+    }
1407
+    pc->frame_start_found= pic_found;
1408
+    pc->state= state;
1409
+    return END_NOT_FOUND;
1410
+}
1411
+
1412
+void ff_cavs_flush(AVCodecContext * avctx) {
1413
+    AVSContext *h = (AVSContext *)avctx->priv_data;
1414
+    h->got_keyframe = 0;
1415
+}
1416
+
1417
+static int cavs_decode_frame(AVCodecContext * avctx,void *data, int *data_size,
1418
+                             uint8_t * buf, int buf_size) {
1419
+    AVSContext *h = avctx->priv_data;
1420
+    MpegEncContext *s = &h->s;
1421
+    int input_size;
1422
+    const uint8_t *buf_end;
1423
+    const uint8_t *buf_ptr;
1424
+    AVFrame *picture = data;
1425
+    uint32_t stc;
1426
+
1427
+    s->avctx = avctx;
1428
+
1429
+    if (buf_size == 0) {
1430
+        if(!s->low_delay && h->DPB[0].data[0]) {
1431
+            *data_size = sizeof(AVPicture);
1432
+            *picture = *(AVFrame *) &h->DPB[0];
1433
+        }
1434
+        return 0;
1435
+    }
1436
+
1437
+    buf_ptr = buf;
1438
+    buf_end = buf + buf_size;
1439
+    for(;;) {
1440
+        buf_ptr = ff_find_start_code(buf_ptr,buf_end, &stc);
1441
+        if(stc & 0xFFFFFE00)
1442
+            return FFMAX(0, buf_ptr - buf - s->parse_context.last_index);
1443
+        input_size = (buf_end - buf_ptr)*8;
1444
+        switch(stc) {
1445
+        case SEQ_START_CODE:
1446
+            init_get_bits(&s->gb, buf_ptr, input_size);
1447
+            decode_seq_header(h);
1448
+            break;
1449
+        case PIC_I_START_CODE:
1450
+            if(!h->got_keyframe) {
1451
+                if(h->DPB[0].data[0])
1452
+                    avctx->release_buffer(avctx, (AVFrame *)&h->DPB[0]);
1453
+                if(h->DPB[1].data[0])
1454
+                    avctx->release_buffer(avctx, (AVFrame *)&h->DPB[1]);
1455
+                h->got_keyframe = 1;
1456
+            }
1457
+        case PIC_PB_START_CODE:
1458
+            *data_size = 0;
1459
+            if(!h->got_keyframe)
1460
+                break;
1461
+            init_get_bits(&s->gb, buf_ptr, input_size);
1462
+            h->stc = stc;
1463
+            if(decode_pic(h))
1464
+                break;
1465
+            *data_size = sizeof(AVPicture);
1466
+            if(h->pic_type != FF_B_TYPE) {
1467
+                if(h->DPB[1].data[0]) {
1468
+                    *picture = *(AVFrame *) &h->DPB[1];
1469
+                } else {
1470
+                    *data_size = 0;
1471
+                }
1472
+            } else
1473
+                *picture = *(AVFrame *) &h->picture;
1474
+            break;
1475
+        case EXT_START_CODE:
1476
+            //mpeg_decode_extension(avctx,buf_ptr, input_size);
1477
+            break;
1478
+        case USER_START_CODE:
1479
+            //mpeg_decode_user_data(avctx,buf_ptr, input_size);
1480
+            break;
1481
+        default:
1482
+            if (stc >= SLICE_MIN_START_CODE &&
1483
+                stc <= SLICE_MAX_START_CODE) {
1484
+                init_get_bits(&s->gb, buf_ptr, input_size);
1485
+                decode_slice_header(h, &s->gb);
1486
+            }
1487
+            break;
1488
+        }
1489
+    }
1490
+}
1491
+
1492
+static int cavs_decode_init(AVCodecContext * avctx) {
1493
+    AVSContext *h = (AVSContext *)avctx->priv_data;
1494
+    MpegEncContext * const s = &h->s;
1495
+
1496
+    MPV_decode_defaults(s);
1497
+    s->avctx = avctx;
1498
+
1499
+    avctx->pix_fmt= PIX_FMT_YUV420P;
1500
+
1501
+    h->luma_scan[0] = 0;
1502
+    h->luma_scan[1] = 8;
1503
+    h->intra_pred_l[      INTRA_L_VERT] = intra_pred_vert;
1504
+    h->intra_pred_l[     INTRA_L_HORIZ] = intra_pred_horiz;
1505
+    h->intra_pred_l[        INTRA_L_LP] = intra_pred_lp;
1506
+    h->intra_pred_l[ INTRA_L_DOWN_LEFT] = intra_pred_down_left;
1507
+    h->intra_pred_l[INTRA_L_DOWN_RIGHT] = intra_pred_down_right;
1508
+    h->intra_pred_l[   INTRA_L_LP_LEFT] = intra_pred_lp_left;
1509
+    h->intra_pred_l[    INTRA_L_LP_TOP] = intra_pred_lp_top;
1510
+    h->intra_pred_l[    INTRA_L_DC_128] = intra_pred_dc_128;
1511
+    h->intra_pred_c[        INTRA_C_LP] = intra_pred_lp;
1512
+    h->intra_pred_c[     INTRA_C_HORIZ] = intra_pred_horiz;
1513
+    h->intra_pred_c[      INTRA_C_VERT] = intra_pred_vert;
1514
+    h->intra_pred_c[     INTRA_C_PLANE] = intra_pred_plane;
1515
+    h->intra_pred_c[   INTRA_C_LP_LEFT] = intra_pred_lp_left;
1516
+    h->intra_pred_c[    INTRA_C_LP_TOP] = intra_pred_lp_top;
1517
+    h->intra_pred_c[    INTRA_C_DC_128] = intra_pred_dc_128;
1518
+    veccpy(&h->mv[ 7], (vector_t *)&un_mv);
1519
+    veccpy(&h->mv[19], (vector_t *)&un_mv);
1520
+    return 0;
1521
+}
1522
+
1523
+static int cavs_decode_end(AVCodecContext * avctx) {
1524
+    AVSContext *h = (AVSContext *)avctx->priv_data;
1525
+
1526
+    av_free(h->top_qp);
1527
+    av_free(h->top_mv[0]);
1528
+    av_free(h->top_mv[1]);
1529
+    av_free(h->top_pred_Y);
1530
+    av_free(h->top_border_y);
1531
+    av_free(h->top_border_u);
1532
+    av_free(h->top_border_v);
1533
+    av_free(h->col_mv);
1534
+    av_free(h->col_type_base);
1535
+    return 0;
1536
+}
1537
+
1538
+AVCodec cavs_decoder = {
1539
+    "cavs",
1540
+    CODEC_TYPE_VIDEO,
1541
+    CODEC_ID_CAVS,
1542
+    sizeof(AVSContext),
1543
+    cavs_decode_init,
1544
+    NULL,
1545
+    cavs_decode_end,
1546
+    cavs_decode_frame,
1547
+    CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, //FIXME is this correct ?
1548
+    .flush= ff_cavs_flush,
1549
+};
0 1550
new file mode 100644
... ...
@@ -0,0 +1,613 @@
0
+/*
1
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
2
+ * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
3
+ *
4
+ * This library is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU Lesser General Public
6
+ * License as published by the Free Software Foundation; either
7
+ * version 2 of the License, or (at your option) any later version.
8
+ *
9
+ * This library is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * Lesser General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Lesser General Public
15
+ * License along with this library; if not, write to the Free Software
16
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
+ */
18
+
19
+#define SLICE_MIN_START_CODE    0x00000101
20
+#define SLICE_MAX_START_CODE    0x000001af
21
+#define EXT_START_CODE          0x000001b5
22
+#define USER_START_CODE         0x000001b2
23
+#define SEQ_START_CODE          0x000001b0
24
+#define PIC_I_START_CODE        0x000001b3
25
+#define PIC_PB_START_CODE       0x000001b6
26
+
27
+#define A_AVAIL                          1
28
+#define B_AVAIL                          2
29
+#define C_AVAIL                          4
30
+#define D_AVAIL                          8
31
+#define NOT_AVAIL                       -1
32
+#define REF_INTRA                       -2
33
+#define REF_DIR                         -3
34
+
35
+#define ESCAPE_CODE                     59
36
+
37
+#define FWD0                          0x01
38
+#define FWD1                          0x02
39
+#define BWD0                          0x04
40
+#define BWD1                          0x08
41
+#define SYM0                          0x10
42
+#define SYM1                          0x20
43
+
44
+#define MV_BWD_OFFS                     12
45
+#define MV_STRIDE                        4
46
+
47
+enum mb_t {
48
+  I_8X8 = 0,
49
+  P_SKIP,
50
+  P_16X16,
51
+  P_16X8,
52
+  P_8X16,
53
+  P_8X8,
54
+  B_SKIP,
55
+  B_DIRECT,
56
+  B_FWD_16X16,
57
+  B_BWD_16X16,
58
+  B_SYM_16X16,
59
+  B_8X8 = 29
60
+};
61
+
62
+enum sub_mb_t {
63
+  B_SUB_DIRECT,
64
+  B_SUB_FWD,
65
+  B_SUB_BWD,
66
+  B_SUB_SYM
67
+};
68
+
69
+enum intra_luma_t {
70
+  INTRA_L_VERT,
71
+  INTRA_L_HORIZ,
72
+  INTRA_L_LP,
73
+  INTRA_L_DOWN_LEFT,
74
+  INTRA_L_DOWN_RIGHT,
75
+  INTRA_L_LP_LEFT,
76
+  INTRA_L_LP_TOP,
77
+  INTRA_L_DC_128
78
+};
79
+
80
+enum intra_chroma_t {
81
+  INTRA_C_LP,
82
+  INTRA_C_HORIZ,
83
+  INTRA_C_VERT,
84
+  INTRA_C_PLANE,
85
+  INTRA_C_LP_LEFT,
86
+  INTRA_C_LP_TOP,
87
+  INTRA_C_DC_128,
88
+};
89
+
90
+enum mv_pred_t {
91
+  MV_PRED_MEDIAN,
92
+  MV_PRED_LEFT,
93
+  MV_PRED_TOP,
94
+  MV_PRED_TOPRIGHT,
95
+  MV_PRED_PSKIP,
96
+  MV_PRED_BSKIP
97
+};
98
+
99
+enum block_t {
100
+  BLK_16X16,
101
+  BLK_16X8,
102
+  BLK_8X16,
103
+  BLK_8X8
104
+};
105
+
106
+enum mv_loc_t {
107
+  MV_FWD_D3 = 0,
108
+  MV_FWD_B2,
109
+  MV_FWD_B3,
110
+  MV_FWD_C2,
111
+  MV_FWD_A1,
112
+  MV_FWD_X0,
113
+  MV_FWD_X1,
114
+  MV_FWD_A3 = 8,
115
+  MV_FWD_X2,
116
+  MV_FWD_X3,
117
+  MV_BWD_D3 = MV_BWD_OFFS,
118
+  MV_BWD_B2,
119
+  MV_BWD_B3,
120
+  MV_BWD_C2,
121
+  MV_BWD_A1,
122
+  MV_BWD_X0,
123
+  MV_BWD_X1,
124
+  MV_BWD_A3 = MV_BWD_OFFS+8,
125
+  MV_BWD_X2,
126
+  MV_BWD_X3
127
+};
128
+
129
+static const uint8_t b_partition_flags[14] = {
130
+  0,0,0,0,0,
131
+  FWD0|FWD1,
132
+  BWD0|BWD1,
133
+  FWD0|BWD1,
134
+  BWD0|FWD1,
135
+  FWD0|SYM1,
136
+  BWD0|SYM1,
137
+  SYM0|FWD1,
138
+  SYM0|BWD1,
139
+  SYM0|SYM1
140
+};
141
+
142
+static const uint8_t scan3x3[4] = {4,5,7,8};
143
+
144
+static const uint8_t mv_scan[4] = {
145
+    MV_FWD_X0,MV_FWD_X1,
146
+    MV_FWD_X2,MV_FWD_X3
147
+};
148
+
149
+static const uint8_t cbp_tab[64][2] = {
150
+  {63, 0},{15,15},{31,63},{47,31},{ 0,16},{14,32},{13,47},{11,13},
151
+  { 7,14},{ 5,11},{10,12},{ 8, 5},{12,10},{61, 7},{ 4,48},{55, 3},
152
+  { 1, 2},{ 2, 8},{59, 4},{ 3, 1},{62,61},{ 9,55},{ 6,59},{29,62},
153
+  {45,29},{51,27},{23,23},{39,19},{27,30},{46,28},{53, 9},{30, 6},
154
+  {43,60},{37,21},{60,44},{16,26},{21,51},{28,35},{19,18},{35,20},
155
+  {42,24},{26,53},{44,17},{32,37},{58,39},{24,45},{20,58},{17,43},
156
+  {18,42},{48,46},{22,36},{33,33},{25,34},{49,40},{40,52},{36,49},
157
+  {34,50},{50,56},{52,25},{54,22},{41,54},{56,57},{38,41},{57,38}
158
+};
159
+
160
+static const uint8_t chroma_qp[64] = {
161
+  0,  1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
162
+  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
163
+  32,33,34,35,36,37,38,39,40,41,42,42,43,43,44,44,
164
+  45,45,46,46,47,47,48,48,48,49,49,49,50,50,50,51
165
+};
166
+
167
+static const uint8_t dequant_shift[64] = {
168
+  14,14,14,14,14,14,14,14,
169
+  13,13,13,13,13,13,13,13,
170
+  13,12,12,12,12,12,12,12,
171
+  11,11,11,11,11,11,11,11,
172
+  11,10,10,10,10,10,10,10,
173
+  10, 9, 9, 9, 9, 9, 9, 9,
174
+  9, 8, 8, 8, 8, 8, 8, 8,
175
+  7, 7, 7, 7, 7, 7, 7, 7
176
+};
177
+
178
+static const uint16_t dequant_mul[64] = {
179
+  32768,36061,38968,42495,46341,50535,55437,60424,
180
+  32932,35734,38968,42495,46177,50535,55109,59933,
181
+  65535,35734,38968,42577,46341,50617,55027,60097,
182
+  32809,35734,38968,42454,46382,50576,55109,60056,
183
+  65535,35734,38968,42495,46320,50515,55109,60076,
184
+  65535,35744,38968,42495,46341,50535,55099,60087,
185
+  65535,35734,38973,42500,46341,50535,55109,60097,
186
+  32771,35734,38965,42497,46341,50535,55109,60099
187
+};
188
+
189
+typedef struct {
190
+    int16_t x;
191
+    int16_t y;
192
+    int16_t dist;
193
+    int16_t ref;
194
+} vector_t;
195
+
196
+// marks block as unavailable, i.e. out of picture
197
+//  or not yet decoded
198
+static const vector_t un_mv    = {0,0,1,NOT_AVAIL};
199
+
200
+//marks block as "no prediction from this direction"
201
+// e.g. forward motion vector in BWD partition
202
+static const vector_t dir_mv   = {0,0,1,REF_DIR};
203
+
204
+//marks block as using intra prediction
205
+static const vector_t intra_mv = {0,0,1,REF_INTRA};
206
+
207
+typedef struct residual_vlc_t {
208
+  int8_t rltab[59][3];
209
+  int8_t level_add[26];
210
+  int8_t golomb_order;
211
+  int inc_limit;
212
+  int8_t max_run;
213
+} residual_vlc_t;
214
+
215
+static const residual_vlc_t intra_2dvlc[7] = {
216
+  {
217
+    { //level / run / table_inc
218
+      {  1, 0, 1},{ -1, 0, 1},{  1, 1, 1},{ -1, 1, 1},{  1, 2, 1},{ -1, 2, 1},
219
+      {  1, 3, 1},{ -1, 3, 1},{  1, 4, 1},{ -1, 4, 1},{  1, 5, 1},{ -1, 5, 1},
220
+      {  1, 6, 1},{ -1, 6, 1},{  1, 7, 1},{ -1, 7, 1},{  1, 8, 1},{ -1, 8, 1},
221
+      {  1, 9, 1},{ -1, 9, 1},{  1,10, 1},{ -1,10, 1},{  2, 0, 2},{ -2, 0, 2},
222
+      {  1,11, 1},{ -1,11, 1},{  1,12, 1},{ -1,12, 1},{  1,13, 1},{ -1,13, 1},
223
+      {  1,14, 1},{ -1,14, 1},{  2, 1, 2},{ -2, 1, 2},{  1,15, 1},{ -1,15, 1},
224
+      {  1,16, 1},{ -1,16, 1},{  3, 0, 3},{ -3, 0, 3},{  1,17, 1},{ -1,17, 1},
225
+      {  1,18, 1},{ -1,18, 1},{  2, 2, 2},{ -2, 2, 2},{  1,19, 1},{ -1,19, 1},
226
+      {  1,20, 1},{ -1,20, 1},{  2, 3, 2},{ -2, 3, 2},{  1,21, 1},{ -1,21, 1},
227
+      {  2, 4, 2},{ -2, 4, 2},{  1,22, 1},{ -1,22, 1},{  0, 0,-1}
228
+    },
229
+    //level_add
230
+    { 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
231
+      2, 2, 2, 2, 2, 2, 2,-1,-1,-1},
232
+    2, //golomb_order
233
+    0, //inc_limit
234
+    22, //max_run
235
+  },{
236
+    { //level / run
237
+      {  1, 0, 0},{ -1, 0, 0},{  1, 1, 0},{ -1, 1, 0},{  2, 0, 1},{ -2, 0, 1},
238
+      {  1, 2, 0},{ -1, 2, 0},{  0, 0, 0},{  1, 3, 0},{ -1, 3, 0},{  1, 4, 0},
239
+      { -1, 4, 0},{  1, 5, 0},{ -1, 5, 0},{  3, 0, 2},{ -3, 0, 2},{  2, 1, 1},
240
+      { -2, 1, 1},{  1, 6, 0},{ -1, 6, 0},{  1, 7, 0},{ -1, 7, 0},{  1, 8, 0},
241
+      { -1, 8, 0},{  2, 2, 1},{ -2, 2, 1},{  4, 0, 2},{ -4, 0, 2},{  1, 9, 0},
242
+      { -1, 9, 0},{  1,10, 0},{ -1,10, 0},{  2, 3, 1},{ -2, 3, 1},{  3, 1, 2},
243
+      { -3, 1, 2},{  1,11, 0},{ -1,11, 0},{  2, 4, 1},{ -2, 4, 1},{  5, 0, 3},
244
+      { -5, 0, 3},{  1,12, 0},{ -1,12, 0},{  2, 5, 1},{ -2, 5, 1},{  1,13, 0},
245
+      { -1,13, 0},{  2, 6, 1},{ -2, 6, 1},{  2, 7, 1},{ -2, 7, 1},{  3, 2, 2},
246
+      { -3, 2, 2},{  6, 0, 3},{ -6, 0, 3},{  1,14, 0},{ -1,14, 0}
247
+    },
248
+    //level_add
249
+    { 7, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,-1,
250
+      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
251
+    2, //golomb_order
252
+    1, //inc_limit
253
+    14, //max_run
254
+  },{
255
+    { //level / run
256
+      {  1, 0, 0},{ -1, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  1, 1, 0},{ -1, 1, 0},
257
+      {  3, 0, 1},{ -3, 0, 1},{  0, 0, 0},{  1, 2, 0},{ -1, 2, 0},{  2, 1, 0},
258
+      { -2, 1, 0},{  4, 0, 1},{ -4, 0, 1},{  1, 3, 0},{ -1, 3, 0},{  5, 0, 2},
259
+      { -5, 0, 2},{  1, 4, 0},{ -1, 4, 0},{  3, 1, 1},{ -3, 1, 1},{  2, 2, 0},
260
+      { -2, 2, 0},{  1, 5, 0},{ -1, 5, 0},{  6, 0, 2},{ -6, 0, 2},{  2, 3, 0},
261
+      { -2, 3, 0},{  1, 6, 0},{ -1, 6, 0},{  4, 1, 1},{ -4, 1, 1},{  7, 0, 2},
262
+      { -7, 0, 2},{  3, 2, 1},{ -3, 2, 1},{  2, 4, 0},{ -2, 4, 0},{  1, 7, 0},
263
+      { -1, 7, 0},{  2, 5, 0},{ -2, 5, 0},{  8, 0, 3},{ -8, 0, 3},{  1, 8, 0},
264
+      { -1, 8, 0},{  5, 1, 2},{ -5, 1, 2},{  3, 3, 1},{ -3, 3, 1},{  2, 6, 0},
265
+      { -2, 6, 0},{  9, 0, 3},{ -9, 0, 3},{  1, 9, 0},{ -1, 9, 0}
266
+    },
267
+    //level_add
268
+    {10, 6, 4, 4, 3, 3, 3, 2, 2, 2,-1,-1,-1,-1,-1,-1,
269
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
270
+    2, //golomb_order
271
+    2, //inc_limit
272
+    9, //max_run
273
+  },{
274
+    { //level / run
275
+      {  1, 0, 0},{ -1, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  3, 0, 0},{ -3, 0, 0},
276
+      {  1, 1, 0},{ -1, 1, 0},{  0, 0, 0},{  4, 0, 0},{ -4, 0, 0},{  5, 0, 1},
277
+      { -5, 0, 1},{  2, 1, 0},{ -2, 1, 0},{  1, 2, 0},{ -1, 2, 0},{  6, 0, 1},
278
+      { -6, 0, 1},{  3, 1, 0},{ -3, 1, 0},{  7, 0, 1},{ -7, 0, 1},{  1, 3, 0},
279
+      { -1, 3, 0},{  8, 0, 2},{ -8, 0, 2},{  2, 2, 0},{ -2, 2, 0},{  4, 1, 0},
280
+      { -4, 1, 0},{  1, 4, 0},{ -1, 4, 0},{  9, 0, 2},{ -9, 0, 2},{  5, 1, 1},
281
+      { -5, 1, 1},{  2, 3, 0},{ -2, 3, 0},{ 10, 0, 2},{-10, 0, 2},{  3, 2, 0},
282
+      { -3, 2, 0},{  1, 5, 0},{ -1, 5, 0},{ 11, 0, 3},{-11, 0, 3},{  6, 1, 1},
283
+      { -6, 1, 1},{  1, 6, 0},{ -1, 6, 0},{  2, 4, 0},{ -2, 4, 0},{  3, 3, 0},
284
+      { -3, 3, 0},{ 12, 0, 3},{-12, 0, 3},{  4, 2, 0},{ -4, 2, 0}
285
+    },
286
+    //level_add
287
+    {13, 7, 5, 4, 3, 2, 2,-1,-1,-1 -1,-1,-1,-1,-1,-1,
288
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
289
+    2, //golomb_order
290
+    4, //inc_limit
291
+    6, //max_run
292
+  },{
293
+    { //level / run
294
+      {  1, 0, 0},{ -1, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  3, 0, 0},{ -3, 0, 0},
295
+      {  0, 0, 0},{  4, 0, 0},{ -4, 0, 0},{  5, 0, 0},{ -5, 0, 0},{  6, 0, 0},
296
+      { -6, 0, 0},{  1, 1, 0},{ -1, 1, 0},{  7, 0, 0},{ -7, 0, 0},{  8, 0, 1},
297
+      { -8, 0, 1},{  2, 1, 0},{ -2, 1, 0},{  9, 0, 1},{ -9, 0, 1},{ 10, 0, 1},
298
+      {-10, 0, 1},{  1, 2, 0},{ -1, 2, 0},{  3, 1, 0},{ -3, 1, 0},{ 11, 0, 2},
299
+      {-11, 0, 2},{  4, 1, 0},{ -4, 1, 0},{ 12, 0, 2},{-12, 0, 2},{ 13, 0, 2},
300
+      {-13, 0, 2},{  5, 1, 0},{ -5, 1, 0},{  1, 3, 0},{ -1, 3, 0},{  2, 2, 0},
301
+      { -2, 2, 0},{ 14, 0, 2},{-14, 0, 2},{  6, 1, 0},{ -6, 1, 0},{ 15, 0, 2},
302
+      {-15, 0, 2},{ 16, 0, 2},{-16, 0, 2},{  3, 2, 0},{ -3, 2, 0},{  1, 4, 0},
303
+      { -1, 4, 0},{  7, 1, 0},{ -7, 1, 0},{ 17, 0, 2},{-17, 0, 2},
304
+    },
305
+    //level_add
306
+    {18, 8, 4, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
307
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
308
+    2, //golomb_order
309
+    7, //inc_limit
310
+    4, //max_run
311
+  },{
312
+    { //level / run
313
+      {  0, 0, 0},{  1, 0, 0},{ -1, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  3, 0, 0},
314
+      { -3, 0, 0},{  4, 0, 0},{ -4, 0, 0},{  5, 0, 0},{ -5, 0, 0},{  6, 0, 0},
315
+      { -6, 0, 0},{  7, 0, 0},{ -7, 0, 0},{  8, 0, 0},{ -8, 0, 0},{  9, 0, 0},
316
+      { -9, 0, 0},{ 10, 0, 0},{-10, 0, 0},{  1, 1, 0},{ -1, 1, 0},{ 11, 0, 1},
317
+      {-11, 0, 1},{ 12, 0, 1},{-12, 0, 1},{ 13, 0, 1},{-13, 0, 1},{  2, 1, 0},
318
+      { -2, 1, 0},{ 14, 0, 1},{-14, 0, 1},{ 15, 0, 1},{-15, 0, 1},{  3, 1, 0},
319
+      { -3, 1, 0},{ 16, 0, 1},{-16, 0, 1},{  1, 2, 0},{ -1, 2, 0},{ 17, 0, 1},
320
+      {-17, 0, 1},{  4, 1, 0},{ -4, 1, 0},{ 18, 0, 1},{-18, 0, 1},{  5, 1, 0},
321
+      { -5, 1, 0},{ 19, 0, 1},{-19, 0, 1},{ 20, 0, 1},{-20, 0, 1},{  6, 1, 0},
322
+      { -6, 1, 0},{ 21, 0, 1},{-21, 0, 1},{  2, 2, 0},{ -2, 2, 0},
323
+    },
324
+    //level_add
325
+    {22, 7, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
326
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
327
+    2, //golomb_order
328
+    10, //inc_limit
329
+    2, //max_run
330
+  },{
331
+    { //level / run
332
+      {  0, 0, 0},{  1, 0, 0},{ -1, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  3, 0, 0},
333
+      { -3, 0, 0},{  4, 0, 0},{ -4, 0, 0},{  5, 0, 0},{ -5, 0, 0},{  6, 0, 0},
334
+      { -6, 0, 0},{  7, 0, 0},{ -7, 0, 0},{  8, 0, 0},{ -8, 0, 0},{  9, 0, 0},
335
+      { -9, 0, 0},{ 10, 0, 0},{-10, 0, 0},{ 11, 0, 0},{-11, 0, 0},{ 12, 0, 0},
336
+      {-12, 0, 0},{ 13, 0, 0},{-13, 0, 0},{ 14, 0, 0},{-14, 0, 0},{ 15, 0, 0},
337
+      {-15, 0, 0},{ 16, 0, 0},{-16, 0, 0},{  1, 1, 0},{ -1, 1, 0},{ 17, 0, 0},
338
+      {-17, 0, 0},{ 18, 0, 0},{-18, 0, 0},{ 19, 0, 0},{-19, 0, 0},{ 20, 0, 0},
339
+      {-20, 0, 0},{ 21, 0, 0},{-21, 0, 0},{  2, 1, 0},{ -2, 1, 0},{ 22, 0, 0},
340
+      {-22, 0, 0},{ 23, 0, 0},{-23, 0, 0},{ 24, 0, 0},{-24, 0, 0},{ 25, 0, 0},
341
+      {-25, 0, 0},{  3, 1, 0},{ -3, 1, 0},{ 26, 0, 0},{-26, 0, 0}
342
+    },
343
+    //level_add
344
+    {27, 4,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
345
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
346
+    2, //golomb_order
347
+    INT_MAX, //inc_limit
348
+    1, //max_run
349
+  }
350
+};
351
+
352
+static const residual_vlc_t inter_2dvlc[7] = {
353
+  {
354
+    { //level / run
355
+      {  1, 0, 1},{ -1, 0, 1},{  1, 1, 1},{ -1, 1, 1},{  1, 2, 1},{ -1, 2, 1},
356
+      {  1, 3, 1},{ -1, 3, 1},{  1, 4, 1},{ -1, 4, 1},{  1, 5, 1},{ -1, 5, 1},
357
+      {  1, 6, 1},{ -1, 6, 1},{  1, 7, 1},{ -1, 7, 1},{  1, 8, 1},{ -1, 8, 1},
358
+      {  1, 9, 1},{ -1, 9, 1},{  1,10, 1},{ -1,10, 1},{  1,11, 1},{ -1,11, 1},
359
+      {  1,12, 1},{ -1,12, 1},{  2, 0, 2},{ -2, 0, 2},{  1,13, 1},{ -1,13, 1},
360
+      {  1,14, 1},{ -1,14, 1},{  1,15, 1},{ -1,15, 1},{  1,16, 1},{ -1,16, 1},
361
+      {  1,17, 1},{ -1,17, 1},{  1,18, 1},{ -1,18, 1},{  3, 0, 3},{ -3, 0, 3},
362
+      {  1,19, 1},{ -1,19, 1},{  1,20, 1},{ -1,20, 1},{  2, 1, 2},{ -2, 1, 2},
363
+      {  1,21, 1},{ -1,21, 1},{  1,22, 1},{ -1,22, 1},{  1,23, 1},{ -1,23, 1},
364
+      {  1,24, 1},{ -1,24, 1},{  1,25, 1},{ -1,25, 1},{  0, 0,-1}
365
+    },
366
+    //level_add
367
+    { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
368
+      2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
369
+    3, //golomb_order
370
+    0, //inc_limit
371
+    25 //max_run
372
+  },{
373
+    { //level / run
374
+      {  1, 0, 0},{ -1, 0, 0},{  0, 0, 0},{  1, 1, 0},{ -1, 1, 0},{  1, 2, 0},
375
+      { -1, 2, 0},{  1, 3, 0},{ -1, 3, 0},{  1, 4, 0},{ -1, 4, 0},{  1, 5, 0},
376
+      { -1, 5, 0},{  2, 0, 1},{ -2, 0, 1},{  1, 6, 0},{ -1, 6, 0},{  1, 7, 0},
377
+      { -1, 7, 0},{  1, 8, 0},{ -1, 8, 0},{  1, 9, 0},{ -1, 9, 0},{  2, 1, 1},
378
+      { -2, 1, 1},{  1,10, 0},{ -1,10, 0},{  1,11, 0},{ -1,11, 0},{  3, 0, 2},
379
+      { -3, 0, 2},{  1,12, 0},{ -1,12, 0},{  1,13, 0},{ -1,13, 0},{  2, 2, 1},
380
+      { -2, 2, 1},{  1,14, 0},{ -1,14, 0},{  2, 3, 1},{ -2, 3, 1},{  1,15, 0},
381
+      { -1,15, 0},{  2, 4, 1},{ -2, 4, 1},{  1,16, 0},{ -1,16, 0},{  4, 0, 3},
382
+      { -4, 0, 3},{  2, 5, 1},{ -2, 5, 1},{  1,17, 0},{ -1,17, 0},{  1,18, 0},
383
+      { -1,18, 0},{  2, 6, 1},{ -2, 6, 1},{  3, 1, 2},{ -3, 1, 2},
384
+    },
385
+    //level_add
386
+    { 5, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2,
387
+      2, 2, 2,-1,-1,-1,-1,-1,-1,-1},
388
+    2, //golomb_order
389
+    1, //inc_limit
390
+    18 //max_run
391
+  },{
392
+    { //level / run
393
+      {  1, 0, 0},{ -1, 0, 0},{  0, 0, 0},{  1, 1, 0},{ -1, 1, 0},{  2, 0, 0},
394
+      { -2, 0, 0},{  1, 2, 0},{ -1, 2, 0},{  1, 3, 0},{ -1, 3, 0},{  3, 0, 1},
395
+      { -3, 0, 1},{  2, 1, 0},{ -2, 1, 0},{  1, 4, 0},{ -1, 4, 0},{  1, 5, 0},
396
+      { -1, 5, 0},{  1, 6, 0},{ -1, 6, 0},{  2, 2, 0},{ -2, 2, 0},{  4, 0, 2},
397
+      { -4, 0, 2},{  1, 7, 0},{ -1, 7, 0},{  3, 1, 1},{ -3, 1, 1},{  2, 3, 0},
398
+      { -2, 3, 0},{  1, 8, 0},{ -1, 8, 0},{  1, 9, 0},{ -1, 9, 0},{  5, 0, 2},
399
+      { -5, 0, 2},{  2, 4, 0},{ -2, 4, 0},{  1,10, 0},{ -1,10, 0},{  2, 5, 0},
400
+      { -2, 5, 0},{  1,11, 0},{ -1,11, 0},{  3, 2, 1},{ -3, 2, 1},{  6, 0, 2},
401
+      { -6, 0, 2},{  4, 1, 2},{ -4, 1, 2},{  1,12, 0},{ -1,12, 0},{  2, 6, 0},
402
+      { -2, 6, 0},{  3, 3, 1},{ -3, 3, 1},{  1,13, 0},{ -1,13, 0},
403
+    },
404
+    //level_add
405
+    { 7, 5, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,-1,-1,
406
+      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
407
+    2, //golomb_order
408
+    2, //inc_limit
409
+    13 //max_run
410
+  },{
411
+    { //level / run
412
+      {  1, 0, 0},{ -1, 0, 0},{  0, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  1, 1, 0},
413
+      { -1, 1, 0},{  3, 0, 0},{ -3, 0, 0},{  1, 2, 0},{ -1, 2, 0},{  2, 1, 0},
414
+      { -2, 1, 0},{  4, 0, 1},{ -4, 0, 1},{  1, 3, 0},{ -1, 3, 0},{  5, 0, 1},
415
+      { -5, 0, 1},{  1, 4, 0},{ -1, 4, 0},{  3, 1, 0},{ -3, 1, 0},{  2, 2, 0},
416
+      { -2, 2, 0},{  1, 5, 0},{ -1, 5, 0},{  6, 0, 1},{ -6, 0, 1},{  2, 3, 0},
417
+      { -2, 3, 0},{  1, 6, 0},{ -1, 6, 0},{  4, 1, 1},{ -4, 1, 1},{  7, 0, 2},
418
+      { -7, 0, 2},{  3, 2, 0},{ -3, 2, 0},{  1, 7, 0},{ -1, 7, 0},{  2, 4, 0},
419
+      { -2, 4, 0},{  8, 0, 2},{ -8, 0, 2},{  1, 8, 0},{ -1, 8, 0},{  3, 3, 0},
420
+      { -3, 3, 0},{  2, 5, 0},{ -2, 5, 0},{  5, 1, 1},{ -5, 1, 1},{  1, 9, 0},
421
+      { -1, 9, 0},{  9, 0, 2},{ -9, 0, 2},{  4, 2, 1},{ -4, 2, 1},
422
+    },
423
+    //level_add
424
+    {10, 6, 5, 4, 3, 3, 2, 2, 2, 2,-1,-1,-1,-1,-1,-1,
425
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
426
+    2, //golomb_order
427
+    3, //inc_limit
428
+    9 //max_run
429
+  },{
430
+    { //level / run
431
+      {  1, 0, 0},{ -1, 0, 0},{  0, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  3, 0, 0},
432
+      { -3, 0, 0},{  1, 1, 0},{ -1, 1, 0},{  4, 0, 0},{ -4, 0, 0},{  5, 0, 0},
433
+      { -5, 0, 0},{  2, 1, 0},{ -2, 1, 0},{  1, 2, 0},{ -1, 2, 0},{  6, 0, 0},
434
+      { -6, 0, 0},{  3, 1, 0},{ -3, 1, 0},{  7, 0, 1},{ -7, 0, 1},{  1, 3, 0},
435
+      { -1, 3, 0},{  8, 0, 1},{ -8, 0, 1},{  2, 2, 0},{ -2, 2, 0},{  4, 1, 0},
436
+      { -4, 1, 0},{  1, 4, 0},{ -1, 4, 0},{  9, 0, 1},{ -9, 0, 1},{  5, 1, 0},
437
+      { -5, 1, 0},{  2, 3, 0},{ -2, 3, 0},{  1, 5, 0},{ -1, 5, 0},{ 10, 0, 2},
438
+      {-10, 0, 2},{  3, 2, 0},{ -3, 2, 0},{ 11, 0, 2},{-11, 0, 2},{  1, 6, 0},
439
+      { -1, 6, 0},{  6, 1, 0},{ -6, 1, 0},{  3, 3, 0},{ -3, 3, 0},{  2, 4, 0},
440
+      { -2, 4, 0},{ 12, 0, 2},{-12, 0, 2},{  4, 2, 0},{ -4, 2, 0},
441
+    },
442
+    //level_add
443
+    {13, 7, 5, 4, 3, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,
444
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
445
+    2, //golomb_order
446
+    6, //inc_limit
447
+    6 //max_run
448
+  },{
449
+    { //level / run
450
+      {  0, 0, 0},{  1, 0, 0},{ -1, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  3, 0, 0},
451
+      { -3, 0, 0},{  4, 0, 0},{ -4, 0, 0},{  5, 0, 0},{ -5, 0, 0},{  1, 1, 0},
452
+      { -1, 1, 0},{  6, 0, 0},{ -6, 0, 0},{  7, 0, 0},{ -7, 0, 0},{  8, 0, 0},
453
+      { -8, 0, 0},{  2, 1, 0},{ -2, 1, 0},{  9, 0, 0},{ -9, 0, 0},{  1, 2, 0},
454
+      { -1, 2, 0},{ 10, 0, 1},{-10, 0, 1},{  3, 1, 0},{ -3, 1, 0},{ 11, 0, 1},
455
+      {-11, 0, 1},{  4, 1, 0},{ -4, 1, 0},{ 12, 0, 1},{-12, 0, 1},{  1, 3, 0},
456
+      { -1, 3, 0},{  2, 2, 0},{ -2, 2, 0},{ 13, 0, 1},{-13, 0, 1},{  5, 1, 0},
457
+      { -5, 1, 0},{ 14, 0, 1},{-14, 0, 1},{  6, 1, 0},{ -6, 1, 0},{  1, 4, 0},
458
+      { -1, 4, 0},{ 15, 0, 1},{-15, 0, 1},{  3, 2, 0},{ -3, 2, 0},{ 16, 0, 1},
459
+      {-16, 0, 1},{  2, 3, 0},{ -2, 3, 0},{  7, 1, 0},{ -7, 1, 0},
460
+    },
461
+    //level_add
462
+    {17, 8, 4, 3, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
463
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
464
+    2, //golomb_order
465
+    9, //inc_limit
466
+    4 //max_run
467
+  },{
468
+    { //level / run
469
+      {  0, 0, 0},{  1, 0, 0},{ -1, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  3, 0, 0},
470
+      { -3, 0, 0},{  4, 0, 0},{ -4, 0, 0},{  5, 0, 0},{ -5, 0, 0},{  6, 0, 0},
471
+      { -6, 0, 0},{  7, 0, 0},{ -7, 0, 0},{  1, 1, 0},{ -1, 1, 0},{  8, 0, 0},
472
+      { -8, 0, 0},{  9, 0, 0},{ -9, 0, 0},{ 10, 0, 0},{-10, 0, 0},{ 11, 0, 0},
473
+      {-11, 0, 0},{ 12, 0, 0},{-12, 0, 0},{  2, 1, 0},{ -2, 1, 0},{ 13, 0, 0},
474
+      {-13, 0, 0},{  1, 2, 0},{ -1, 2, 0},{ 14, 0, 0},{-14, 0, 0},{ 15, 0, 0},
475
+      {-15, 0, 0},{  3, 1, 0},{ -3, 1, 0},{ 16, 0, 0},{-16, 0, 0},{ 17, 0, 0},
476
+      {-17, 0, 0},{ 18, 0, 0},{-18, 0, 0},{  4, 1, 0},{ -4, 1, 0},{ 19, 0, 0},
477
+      {-19, 0, 0},{ 20, 0, 0},{-20, 0, 0},{  2, 2, 0},{ -2, 2, 0},{  1, 3, 0},
478
+      { -1, 3, 0},{  5, 1, 0},{ -5, 1, 0},{ 21, 0, 0},{-21, 0, 0},
479
+    },
480
+    //level_add
481
+    {22, 6, 3, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
482
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
483
+    2, //golomb_order
484
+    INT_MAX, //inc_limit
485
+    3 //max_run
486
+  }
487
+};
488
+
489
+static const residual_vlc_t chroma_2dvlc[5] = {
490
+  {
491
+    { //level / run
492
+      {  1, 0, 1},{ -1, 0, 1},{  1, 1, 1},{ -1, 1, 1},{  1, 2, 1},{ -1, 2, 1},
493
+      {  1, 3, 1},{ -1, 3, 1},{  1, 4, 1},{ -1, 4, 1},{  1, 5, 1},{ -1, 5, 1},
494
+      {  1, 6, 1},{ -1, 6, 1},{  2, 0, 2},{ -2, 0, 2},{  1, 7, 1},{ -1, 7, 1},
495
+      {  1, 8, 1},{ -1, 8, 1},{  1, 9, 1},{ -1, 9, 1},{  1,10, 1},{ -1,10, 1},
496
+      {  1,11, 1},{ -1,11, 1},{  1,12, 1},{ -1,12, 1},{  1,13, 1},{ -1,13, 1},
497
+      {  1,14, 1},{ -1,14, 1},{  3, 0, 3},{ -3, 0, 3},{  1,15, 1},{ -1,15, 1},
498
+      {  1,16, 1},{ -1,16, 1},{  1,17, 1},{ -1,17, 1},{  1,18, 1},{ -1,18, 1},
499
+      {  1,19, 1},{ -1,19, 1},{  1,20, 1},{ -1,20, 1},{  1,21, 1},{ -1,21, 1},
500
+      {  2, 1, 2},{ -2, 1, 2},{  1,22, 1},{ -1,22, 1},{  1,23, 1},{ -1,23, 1},
501
+      {  1,24, 1},{ -1,24, 1},{  4, 0, 3},{ -4, 0, 3},{  0, 0,-1}
502
+    },
503
+    //level_add
504
+    { 5, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
505
+      2, 2, 2, 2, 2, 2, 2, 2, 2,-1},
506
+    2, //golomb_order
507
+    0, //inc_limit
508
+    24, //max_run
509
+  },{
510
+    { //level / run
511
+      {  0, 0, 0},{  1, 0, 0},{ -1, 0, 0},{  1, 1, 0},{ -1, 1, 0},{  2, 0, 1},
512
+      { -2, 0, 1},{  1, 2, 0},{ -1, 2, 0},{  1, 3, 0},{ -1, 3, 0},{  1, 4, 0},
513
+      { -1, 4, 0},{  1, 5, 0},{ -1, 5, 0},{  3, 0, 2},{ -3, 0, 2},{  1, 6, 0},
514
+      { -1, 6, 0},{  1, 7, 0},{ -1, 7, 0},{  2, 1, 1},{ -2, 1, 1},{  1, 8, 0},
515
+      { -1, 8, 0},{  1, 9, 0},{ -1, 9, 0},{  1,10, 0},{ -1,10, 0},{  4, 0, 2},
516
+      { -4, 0, 2},{  1,11, 0},{ -1,11, 0},{  1,12, 0},{ -1,12, 0},{  1,13, 0},
517
+      { -1,13, 0},{  2, 2, 1},{ -2, 2, 1},{  1,14, 0},{ -1,14, 0},{  2, 3, 1},
518
+      { -2, 3, 1},{  5, 0, 3},{ -5, 0, 3},{  3, 1, 2},{ -3, 1, 2},{  1,15, 0},
519
+      { -1,15, 0},{  1,16, 0},{ -1,16, 0},{  1,17, 0},{ -1,17, 0},{  2, 4, 1},
520
+      { -2, 4, 1},{  1,18, 0},{ -1,18, 0},{  1,19, 0},{ -1,19, 0},
521
+    },
522
+    //level_add
523
+    { 6, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
524
+      2, 2, 2, 2,-1,-1,-1,-1,-1,-1},
525
+    0, //golomb_order
526
+    1, //inc_limit
527
+    19, //max_run
528
+  },{
529
+    { //level / run
530
+      {  1, 0, 0},{ -1, 0, 0},{  0, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  1, 1, 0},
531
+      { -1, 1, 0},{  3, 0, 1},{ -3, 0, 1},{  1, 2, 0},{ -1, 2, 0},{  4, 0, 1},
532
+      { -4, 0, 1},{  2, 1, 0},{ -2, 1, 0},{  1, 3, 0},{ -1, 3, 0},{  5, 0, 2},
533
+      { -5, 0, 2},{  1, 4, 0},{ -1, 4, 0},{  3, 1, 1},{ -3, 1, 1},{  2, 2, 0},
534
+      { -2, 2, 0},{  1, 5, 0},{ -1, 5, 0},{  6, 0, 2},{ -6, 0, 2},{  1, 6, 0},
535
+      { -1, 6, 0},{  2, 3, 0},{ -2, 3, 0},{  7, 0, 2},{ -7, 0, 2},{  1, 7, 0},
536
+      { -1, 7, 0},{  4, 1, 1},{ -4, 1, 1},{  1, 8, 0},{ -1, 8, 0},{  3, 2, 1},
537
+      { -3, 2, 1},{  2, 4, 0},{ -2, 4, 0},{  2, 5, 0},{ -2, 5, 0},{  8, 0, 2},
538
+      { -8, 0, 2},{  1, 9, 0},{ -1, 9, 0},{  1,10, 0},{ -1,10, 0},{  9, 0, 2},
539
+      { -9, 0, 2},{  5, 1, 2},{ -5, 1, 2},{  3, 3, 1},{ -3, 3, 1},
540
+    },
541
+    //level_add
542
+    {10, 6, 4, 4, 3, 3, 2, 2, 2, 2, 2,-1,-1,-1,-1,-1,
543
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
544
+    1, //golomb_order
545
+    2, //inc_limit
546
+    10, //max_run
547
+  },{
548
+    { //level / run
549
+      {  0, 0, 0},{  1, 0, 0},{ -1, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  3, 0, 0},
550
+      { -3, 0, 0},{  4, 0, 0},{ -4, 0, 0},{  1, 1, 0},{ -1, 1, 0},{  5, 0, 1},
551
+      { -5, 0, 1},{  2, 1, 0},{ -2, 1, 0},{  6, 0, 1},{ -6, 0, 1},{  1, 2, 0},
552
+      { -1, 2, 0},{  7, 0, 1},{ -7, 0, 1},{  3, 1, 0},{ -3, 1, 0},{  8, 0, 1},
553
+      { -8, 0, 1},{  1, 3, 0},{ -1, 3, 0},{  2, 2, 0},{ -2, 2, 0},{  9, 0, 1},
554
+      { -9, 0, 1},{  4, 1, 0},{ -4, 1, 0},{  1, 4, 0},{ -1, 4, 0},{ 10, 0, 1},
555
+      {-10, 0, 1},{  3, 2, 0},{ -3, 2, 0},{  5, 1, 1},{ -5, 1, 1},{  2, 3, 0},
556
+      { -2, 3, 0},{ 11, 0, 1},{-11, 0, 1},{  1, 5, 0},{ -1, 5, 0},{ 12, 0, 1},
557
+      {-12, 0, 1},{  1, 6, 0},{ -1, 6, 0},{  6, 1, 1},{ -6, 1, 1},{ 13, 0, 1},
558
+      {-13, 0, 1},{  2, 4, 0},{ -2, 4, 0},{  1, 7, 0},{ -1, 7, 0},
559
+    },
560
+    //level_add
561
+    {14, 7, 4, 3, 3, 2, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1,
562
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
563
+    1, //golomb_order
564
+    4, //inc_limit
565
+    7, //max_run
566
+  },{
567
+    { //level / run
568
+      {  0, 0, 0},{  1, 0, 0},{ -1, 0, 0},{  2, 0, 0},{ -2, 0, 0},{  3, 0, 0},
569
+      { -3, 0, 0},{  4, 0, 0},{ -4, 0, 0},{  5, 0, 0},{ -5, 0, 0},{  6, 0, 0},
570
+      { -6, 0, 0},{  7, 0, 0},{ -7, 0, 0},{  8, 0, 0},{ -8, 0, 0},{  1, 1, 0},
571
+      { -1, 1, 0},{  9, 0, 0},{ -9, 0, 0},{ 10, 0, 0},{-10, 0, 0},{ 11, 0, 0},
572
+      {-11, 0, 0},{  2, 1, 0},{ -2, 1, 0},{ 12, 0, 0},{-12, 0, 0},{ 13, 0, 0},
573
+      {-13, 0, 0},{  3, 1, 0},{ -3, 1, 0},{ 14, 0, 0},{-14, 0, 0},{  1, 2, 0},
574
+      { -1, 2, 0},{ 15, 0, 0},{-15, 0, 0},{  4, 1, 0},{ -4, 1, 0},{ 16, 0, 0},
575
+      {-16, 0, 0},{ 17, 0, 0},{-17, 0, 0},{  5, 1, 0},{ -5, 1, 0},{  1, 3, 0},
576
+      { -1, 3, 0},{  2, 2, 0},{ -2, 2, 0},{ 18, 0, 0},{-18, 0, 0},{  6, 1, 0},
577
+      { -6, 1, 0},{ 19, 0, 0},{-19, 0, 0},{  1, 4, 0},{ -1, 4, 0},
578
+    },
579
+    //level_add
580
+    {20, 7, 3, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
581
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
582
+    0, //golomb_order
583
+    INT_MAX, //inc_limit
584
+    4, //max_run
585
+  }
586
+};
587
+
588
+static const uint8_t alpha_tab[64] = {
589
+   0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  3,  3,
590
+   4,  4,  5,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 16, 18, 20,
591
+  22, 24, 26, 28, 30, 33, 33, 35, 35, 36, 37, 37, 39, 39, 42, 44,
592
+  46, 48, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
593
+};
594
+
595
+static const uint8_t beta_tab[64] = {
596
+   0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,
597
+   2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,
598
+   6,  7,  7,  7,  8,  8,  8,  9,  9, 10, 10, 11, 11, 12, 13, 14,
599
+  15, 16, 17, 18, 19, 20, 21, 22, 23, 23, 24, 24, 25, 25, 26, 27
600
+};
601
+
602
+static const uint8_t tc_tab[64] = {
603
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
605
+  2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
606
+  5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9
607
+};
608
+
609
+static const int_fast8_t left_modifier_l[8] = { 0,-1, 6,-1,-1, 7, 6, 7};
610
+static const int_fast8_t top_modifier_l[8]  = {-1, 1, 5,-1,-1, 5, 7, 7};
611
+static const int_fast8_t left_modifier_c[7] = { 5,-1, 2,-1, 6, 5, 6};
612
+static const int_fast8_t top_modifier_c[7]  = { 4, 1,-1,-1, 4, 6, 6};
0 613
new file mode 100644
... ...
@@ -0,0 +1,511 @@
0
+/*
1
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
2
+ *
3
+ * DSP functions
4
+ *
5
+ * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
6
+ *
7
+ * This library is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2 of the License, or (at your option) any later version.
11
+ *
12
+ * This library is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with this library; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
+ */
21
+
22
+#include <stdio.h>
23
+#include "dsputil.h"
24
+#include "cavsdsp.h"
25
+
26
+/*****************************************************************************
27
+ *
28
+ * in-loop deblocking filter
29
+ *
30
+ ****************************************************************************/
31
+
32
+#define P2 p0_p[-3*stride]
33
+#define P1 p0_p[-2*stride]
34
+#define P0 p0_p[-1*stride]
35
+#define Q0 p0_p[ 0*stride]
36
+#define Q1 p0_p[ 1*stride]
37
+#define Q2 p0_p[ 2*stride]
38
+
39
+static inline void loop_filter_l2(uint8_t *p0_p,int stride,int alpha, int beta) {
40
+    int p0 = P0;
41
+    int q0 = Q0;
42
+
43
+    if(abs(p0-q0)<alpha && abs(P1-p0)<beta && abs(Q1-q0)<beta) {
44
+        int s = p0 + q0 + 2;
45
+        alpha = (alpha>>2) + 2;
46
+        if(abs(P2-p0) < beta && abs(p0-q0) < alpha) {
47
+            P0 = (P1 + p0 + s) >> 2;
48
+            P1 = (2*P1 + s) >> 2;
49
+        } else
50
+            P0 = (2*P1 + s) >> 2;
51
+        if(abs(Q2-q0) < beta && abs(q0-p0) < alpha) {
52
+            Q0 = (Q1 + q0 + s) >> 2;
53
+            Q1 = (2*Q1 + s) >> 2;
54
+        } else
55
+            Q0 = (2*Q1 + s) >> 2;
56
+    }
57
+}
58
+
59
+static inline void loop_filter_l1(uint8_t *p0_p, int stride, int alpha, int beta, int tc) {
60
+    int p0 = P0;
61
+    int q0 = Q0;
62
+
63
+    if(abs(p0-q0)<alpha && abs(P1-p0)<beta && abs(Q1-q0)<beta) {
64
+        int delta = clip(((q0-p0)*3+P1-Q1+4)>>3,-tc, tc);
65
+        P0 = clip_uint8(p0+delta);
66
+        Q0 = clip_uint8(q0-delta);
67
+        if(abs(P2-p0)<beta) {
68
+            delta = clip(((P0-P1)*3+P2-Q0+4)>>3, -tc, tc);
69
+            P1 = clip_uint8(P1+delta);
70
+        }
71
+        if(abs(Q2-q0)<beta) {
72
+            delta = clip(((Q1-Q0)*3+P0-Q2+4)>>3, -tc, tc);
73
+            Q1 = clip_uint8(Q1-delta);
74
+        }
75
+    }
76
+}
77
+
78
+static inline void loop_filter_c2(uint8_t *p0_p,int stride,int alpha, int beta) {
79
+    int p0 = P0;
80
+    int q0 = Q0;
81
+
82
+    if(abs(p0-q0)<alpha && abs(P1-p0)<beta && abs(Q1-q0)<beta) {
83
+        int s = p0 + q0 + 2;
84
+        alpha = (alpha>>2) + 2;
85
+        if(abs(P2-p0) < beta && abs(p0-q0) < alpha) {
86
+            P0 = (P1 + p0 + s) >> 2;
87
+        } else
88
+            P0 = (2*P1 + s) >> 2;
89
+        if(abs(Q2-q0) < beta && abs(q0-p0) < alpha) {
90
+            Q0 = (Q1 + q0 + s) >> 2;
91
+        } else
92
+            Q0 = (2*Q1 + s) >> 2;
93
+    }
94
+}
95
+
96
+static inline void loop_filter_c1(uint8_t *p0_p,int stride,int alpha, int beta,
97
+                                  int tc) {
98
+    if(abs(P0-Q0)<alpha && abs(P1-P0)<beta && abs(Q1-Q0)<beta) {
99
+        int delta = clip(((Q0-P0)*3+P1-Q1+4)>>3, -tc, tc);
100
+        P0 = clip_uint8(P0+delta);
101
+        Q0 = clip_uint8(Q0-delta);
102
+    }
103
+}
104
+
105
+#undef P0
106
+#undef P1
107
+#undef P2
108
+#undef Q0
109
+#undef Q1
110
+#undef Q2
111
+
112
+void cavs_filter_lv_c(uint8_t *d, int stride, int alpha, int beta, int tc,
113
+                           int bs1, int bs2) {
114
+    int i;
115
+    if(bs1==2)
116
+        for(i=0;i<16;i++)
117
+            loop_filter_l2(d + i*stride,1,alpha,beta);
118
+    else {
119
+        if(bs1)
120
+            for(i=0;i<8;i++)
121
+                loop_filter_l1(d + i*stride,1,alpha,beta,tc);
122
+        if (bs2)
123
+            for(i=8;i<16;i++)
124
+                loop_filter_l1(d + i*stride,1,alpha,beta,tc);
125
+    }
126
+}
127
+
128
+void cavs_filter_lh_c(uint8_t *d, int stride, int alpha, int beta, int tc,
129
+                           int bs1, int bs2) {
130
+    int i;
131
+    if(bs1==2)
132
+        for(i=0;i<16;i++)
133
+            loop_filter_l2(d + i,stride,alpha,beta);
134
+    else {
135
+        if(bs1)
136
+            for(i=0;i<8;i++)
137
+                loop_filter_l1(d + i,stride,alpha,beta,tc);
138
+        if (bs2)
139
+            for(i=8;i<16;i++)
140
+                loop_filter_l1(d + i,stride,alpha,beta,tc);
141
+    }
142
+}
143
+
144
+void cavs_filter_cv_c(uint8_t *d, int stride, int alpha, int beta, int tc,
145
+                           int bs1, int bs2) {
146
+    int i;
147
+    if(bs1==2)
148
+        for(i=0;i<8;i++)
149
+            loop_filter_c2(d + i*stride,1,alpha,beta);
150
+    else {
151
+        if(bs1)
152
+            for(i=0;i<4;i++)
153
+                loop_filter_c1(d + i*stride,1,alpha,beta,tc);
154
+        if (bs2)
155
+            for(i=4;i<8;i++)
156
+                loop_filter_c1(d + i*stride,1,alpha,beta,tc);
157
+    }
158
+}
159
+
160
+void cavs_filter_ch_c(uint8_t *d, int stride, int alpha, int beta, int tc,
161
+                           int bs1, int bs2) {
162
+    int i;
163
+    if(bs1==2)
164
+        for(i=0;i<8;i++)
165
+            loop_filter_c2(d + i,stride,alpha,beta);
166
+    else {
167
+        if(bs1)
168
+            for(i=0;i<4;i++)
169
+                loop_filter_c1(d + i,stride,alpha,beta,tc);
170
+        if (bs2)
171
+            for(i=4;i<8;i++)
172
+                loop_filter_c1(d + i,stride,alpha,beta,tc);
173
+    }
174
+}
175
+
176
+/*****************************************************************************
177
+ *
178
+ * inverse transform
179
+ *
180
+ ****************************************************************************/
181
+
182
+void cavs_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride) {
183
+    int i;
184
+    DCTELEM (*src)[8] = (DCTELEM(*)[8])block;
185
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;
186
+
187
+    for( i = 0; i < 8; i++ ) {
188
+        const int a0 =  3*src[i][1] - (src[i][7]<<1);
189
+        const int a1 =  3*src[i][3] + (src[i][5]<<1);
190
+        const int a2 =  (src[i][3]<<1) - 3*src[i][5];
191
+        const int a3 =  (src[i][1]<<1) + 3*src[i][7];
192
+
193
+        const int b4 = ((a0 + a1 + a3)<<1) + a1;
194
+        const int b5 = ((a0 - a1 + a2)<<1) + a0;
195
+        const int b6 = ((a3 - a2 - a1)<<1) + a3;
196
+        const int b7 = ((a0 - a2 - a3)<<1) - a2;
197
+
198
+        const int a7 = (src[i][2]<<2) - 10*src[i][6];
199
+        const int a6 = (src[i][6]<<2) + 10*src[i][2];
200
+        const int a5 = (src[i][0] - src[i][4]) << 3;
201
+        const int a4 = (src[i][0] + src[i][4]) << 3;
202
+
203
+        const int b0 = a4 + a6;
204
+        const int b1 = a5 + a7;
205
+        const int b2 = a5 - a7;
206
+        const int b3 = a4 - a6;
207
+
208
+        src[i][0] = (b0 + b4 + 4) >> 3;
209
+        src[i][1] = (b1 + b5 + 4) >> 3;
210
+        src[i][2] = (b2 + b6 + 4) >> 3;
211
+        src[i][3] = (b3 + b7 + 4) >> 3;
212
+        src[i][4] = (b3 - b7 + 4) >> 3;
213
+        src[i][5] = (b2 - b6 + 4) >> 3;
214
+        src[i][6] = (b1 - b5 + 4) >> 3;
215
+        src[i][7] = (b0 - b4 + 4) >> 3;
216
+    }
217
+    for( i = 0; i < 8; i++ ) {
218
+        const int a0 =  3*src[1][i] - (src[7][i]<<1);
219
+        const int a1 =  3*src[3][i] + (src[5][i]<<1);
220
+        const int a2 =  (src[3][i]<<1) - 3*src[5][i];
221
+        const int a3 =  (src[1][i]<<1) + 3*src[7][i];
222
+
223
+        const int b4 = ((a0 + a1 + a3)<<1) + a1;
224
+        const int b5 = ((a0 - a1 + a2)<<1) + a0;
225
+        const int b6 = ((a3 - a2 - a1)<<1) + a3;
226
+        const int b7 = ((a0 - a2 - a3)<<1) - a2;
227
+
228
+        const int a7 = (src[2][i]<<2) - 10*src[6][i];
229
+        const int a6 = (src[6][i]<<2) + 10*src[2][i];
230
+        const int a5 = (src[0][i] - src[4][i]) << 3;
231
+        const int a4 = (src[0][i] + src[4][i]) << 3;
232
+
233
+        const int b0 = a4 + a6;
234
+        const int b1 = a5 + a7;
235
+        const int b2 = a5 - a7;
236
+        const int b3 = a4 - a6;
237
+
238
+        dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b4 + 64) >> 7)];
239
+        dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b1 + b5 + 64) >> 7)];
240
+        dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b2 + b6 + 64) >> 7)];
241
+        dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b3 + b7 + 64) >> 7)];
242
+        dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b3 - b7 + 64) >> 7)];
243
+        dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b2 - b6 + 64) >> 7)];
244
+        dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b1 - b5 + 64) >> 7)];
245
+        dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b4 + 64) >> 7)];
246
+    }
247
+}
248
+
249
+/*****************************************************************************
250
+ *
251
+ * motion compensation
252
+ *
253
+ ****************************************************************************/
254
+
255
+#define CAVS_SUBPIX(OPNAME, OP, NAME, A, B, C, D, E, F) \
256
+static void OPNAME ## cavs_filt8_h_ ## NAME(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
257
+    const int h=8;\
258
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
259
+    int i;\
260
+    for(i=0; i<h; i++)\
261
+    {\
262
+        OP(dst[0], A*src[-2] + B*src[-1] + C*src[0] + D*src[1] + E*src[2] + F*src[3]);\
263
+        OP(dst[1], A*src[-1] + B*src[ 0] + C*src[1] + D*src[2] + E*src[3] + F*src[4]);\
264
+        OP(dst[2], A*src[ 0] + B*src[ 1] + C*src[2] + D*src[3] + E*src[4] + F*src[5]);\
265
+        OP(dst[3], A*src[ 1] + B*src[ 2] + C*src[3] + D*src[4] + E*src[5] + F*src[6]);\
266
+        OP(dst[4], A*src[ 2] + B*src[ 3] + C*src[4] + D*src[5] + E*src[6] + F*src[7]);\
267
+        OP(dst[5], A*src[ 3] + B*src[ 4] + C*src[5] + D*src[6] + E*src[7] + F*src[8]);\
268
+        OP(dst[6], A*src[ 4] + B*src[ 5] + C*src[6] + D*src[7] + E*src[8] + F*src[9]);\
269
+        OP(dst[7], A*src[ 5] + B*src[ 6] + C*src[7] + D*src[8] + E*src[9] + F*src[10]);\
270
+        dst+=dstStride;\
271
+        src+=srcStride;\
272
+    }\
273
+}\
274
+\
275
+static void OPNAME ## cavs_filt8_v_  ## NAME(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
276
+    const int w=8;\
277
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
278
+    int i;\
279
+    for(i=0; i<w; i++)\
280
+    {\
281
+        const int srcB= src[-2*srcStride];\
282
+        const int srcA= src[-1*srcStride];\
283
+        const int src0= src[0 *srcStride];\
284
+        const int src1= src[1 *srcStride];\
285
+        const int src2= src[2 *srcStride];\
286
+        const int src3= src[3 *srcStride];\
287
+        const int src4= src[4 *srcStride];\
288
+        const int src5= src[5 *srcStride];\
289
+        const int src6= src[6 *srcStride];\
290
+        const int src7= src[7 *srcStride];\
291
+        const int src8= src[8 *srcStride];\
292
+        const int src9= src[9 *srcStride];\
293
+        const int src10= src[10 *srcStride];\
294
+        OP(dst[0*dstStride], A*srcB + B*srcA + C*src0 + D*src1 + E*src2 + F*src3);\
295
+        OP(dst[1*dstStride], A*srcA + B*src0 + C*src1 + D*src2 + E*src3 + F*src4);\
296
+        OP(dst[2*dstStride], A*src0 + B*src1 + C*src2 + D*src3 + E*src4 + F*src5);\
297
+        OP(dst[3*dstStride], A*src1 + B*src2 + C*src3 + D*src4 + E*src5 + F*src6);\
298
+        OP(dst[4*dstStride], A*src2 + B*src3 + C*src4 + D*src5 + E*src6 + F*src7);\
299
+        OP(dst[5*dstStride], A*src3 + B*src4 + C*src5 + D*src6 + E*src7 + F*src8);\
300
+        OP(dst[6*dstStride], A*src4 + B*src5 + C*src6 + D*src7 + E*src8 + F*src9);\
301
+        OP(dst[7*dstStride], A*src5 + B*src6 + C*src7 + D*src8 + E*src9 + F*src10);\
302
+        dst++;\
303
+        src++;\
304
+    }\
305
+}\
306
+\
307
+static void OPNAME ## cavs_filt16_v_ ## NAME(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
308
+    OPNAME ## cavs_filt8_v_ ## NAME(dst  , src  , dstStride, srcStride);\
309
+    OPNAME ## cavs_filt8_v_ ## NAME(dst+8, src+8, dstStride, srcStride);\
310
+    src += 8*srcStride;\
311
+    dst += 8*dstStride;\
312
+    OPNAME ## cavs_filt8_v_ ## NAME(dst  , src  , dstStride, srcStride);\
313
+    OPNAME ## cavs_filt8_v_ ## NAME(dst+8, src+8, dstStride, srcStride);\
314
+}\
315
+\
316
+static void OPNAME ## cavs_filt16_h_ ## NAME(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
317
+    OPNAME ## cavs_filt8_h_ ## NAME(dst  , src  , dstStride, srcStride);\
318
+    OPNAME ## cavs_filt8_h_ ## NAME(dst+8, src+8, dstStride, srcStride);\
319
+    src += 8*srcStride;\
320
+    dst += 8*dstStride;\
321
+    OPNAME ## cavs_filt8_h_ ## NAME(dst  , src  , dstStride, srcStride);\
322
+    OPNAME ## cavs_filt8_h_ ## NAME(dst+8, src+8, dstStride, srcStride);\
323
+}\
324
+
325
+#define CAVS_SUBPIX_HV(OPNAME, OP, NAME, AH, BH, CH, DH, EH, FH, AV, BV, CV, DV, EV, FV, FULL) \
326
+static void OPNAME ## cavs_filt8_hv_ ## NAME(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int srcStride){\
327
+    int16_t temp[8*(8+5)];\
328
+    int16_t *tmp = temp;\
329
+    const int h=8;\
330
+    const int w=8;\
331
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
332
+    int i;\
333
+    src1 -= 2*srcStride;\
334
+    for(i=0; i<h+5; i++)\
335
+    {\
336
+        tmp[0]= AH*src1[-2] + BH*src1[-1] + CH*src1[0] + DH*src1[1] + EH*src1[2] + FH*src1[3];\
337
+        tmp[1]= AH*src1[-1] + BH*src1[ 0] + CH*src1[1] + DH*src1[2] + EH*src1[3] + FH*src1[4];\
338
+        tmp[2]= AH*src1[ 0] + BH*src1[ 1] + CH*src1[2] + DH*src1[3] + EH*src1[4] + FH*src1[5];\
339
+        tmp[3]= AH*src1[ 1] + BH*src1[ 2] + CH*src1[3] + DH*src1[4] + EH*src1[5] + FH*src1[6];\
340
+        tmp[4]= AH*src1[ 2] + BH*src1[ 3] + CH*src1[4] + DH*src1[5] + EH*src1[6] + FH*src1[7];\
341
+        tmp[5]= AH*src1[ 3] + BH*src1[ 4] + CH*src1[5] + DH*src1[6] + EH*src1[7] + FH*src1[8];\
342
+        tmp[6]= AH*src1[ 4] + BH*src1[ 5] + CH*src1[6] + DH*src1[7] + EH*src1[8] + FH*src1[9];\
343
+        tmp[7]= AH*src1[ 5] + BH*src1[ 6] + CH*src1[7] + DH*src1[8] + EH*src1[9] + FH*src1[10];\
344
+        tmp+=8;\
345
+        src1+=srcStride;\
346
+    }\
347
+    if(FULL) {\
348
+      tmp = temp+8*2;                           \
349
+      for(i=0; i<w; i++)                        \
350
+        {                                       \
351
+          const int tmpB= tmp[-2*8];    \
352
+          const int tmpA= tmp[-1*8];    \
353
+          const int tmp0= tmp[0 *8];    \
354
+          const int tmp1= tmp[1 *8];    \
355
+          const int tmp2= tmp[2 *8];    \
356
+          const int tmp3= tmp[3 *8];    \
357
+          const int tmp4= tmp[4 *8];    \
358
+          const int tmp5= tmp[5 *8];    \
359
+          const int tmp6= tmp[6 *8];    \
360
+          const int tmp7= tmp[7 *8];    \
361
+          const int tmp8= tmp[8 *8];    \
362
+          const int tmp9= tmp[9 *8];    \
363
+          const int tmp10=tmp[10*8];                            \
364
+          OP(dst[0*dstStride], AV*tmpB + BV*tmpA + CV*tmp0 + DV*tmp1 + EV*tmp2 + FV*tmp3 + 64*src2[0*srcStride]); \
365
+          OP(dst[1*dstStride], AV*tmpA + BV*tmp0 + CV*tmp1 + DV*tmp2 + EV*tmp3 + FV*tmp4 + 64*src2[1*srcStride]); \
366
+          OP(dst[2*dstStride], AV*tmp0 + BV*tmp1 + CV*tmp2 + DV*tmp3 + EV*tmp4 + FV*tmp5 + 64*src2[2*srcStride]); \
367
+          OP(dst[3*dstStride], AV*tmp1 + BV*tmp2 + CV*tmp3 + DV*tmp4 + EV*tmp5 + FV*tmp6 + 64*src2[3*srcStride]); \
368
+          OP(dst[4*dstStride], AV*tmp2 + BV*tmp3 + CV*tmp4 + DV*tmp5 + EV*tmp6 + FV*tmp7 + 64*src2[4*srcStride]); \
369
+          OP(dst[5*dstStride], AV*tmp3 + BV*tmp4 + CV*tmp5 + DV*tmp6 + EV*tmp7 + FV*tmp8 + 64*src2[5*srcStride]); \
370
+          OP(dst[6*dstStride], AV*tmp4 + BV*tmp5 + CV*tmp6 + DV*tmp7 + EV*tmp8 + FV*tmp9 + 64*src2[6*srcStride]); \
371
+          OP(dst[7*dstStride], AV*tmp5 + BV*tmp6 + CV*tmp7 + DV*tmp8 + EV*tmp9 + FV*tmp10 + 64*src2[7*srcStride]); \
372
+          dst++;                                                        \
373
+          tmp++;                                                        \
374
+          src2++;                                                       \
375
+        }                                                               \
376
+    } else {\
377
+      tmp = temp+8*2;                           \
378
+      for(i=0; i<w; i++)                        \
379
+        {                                       \
380
+          const int tmpB= tmp[-2*8];    \
381
+          const int tmpA= tmp[-1*8];    \
382
+          const int tmp0= tmp[0 *8];    \
383
+          const int tmp1= tmp[1 *8];    \
384
+          const int tmp2= tmp[2 *8];    \
385
+          const int tmp3= tmp[3 *8];    \
386
+          const int tmp4= tmp[4 *8];    \
387
+          const int tmp5= tmp[5 *8];    \
388
+          const int tmp6= tmp[6 *8];    \
389
+          const int tmp7= tmp[7 *8];    \
390
+          const int tmp8= tmp[8 *8];    \
391
+          const int tmp9= tmp[9 *8];    \
392
+          const int tmp10=tmp[10*8];                            \
393
+          OP(dst[0*dstStride], AV*tmpB + BV*tmpA + CV*tmp0 + DV*tmp1 + EV*tmp2 + FV*tmp3); \
394
+          OP(dst[1*dstStride], AV*tmpA + BV*tmp0 + CV*tmp1 + DV*tmp2 + EV*tmp3 + FV*tmp4); \
395
+          OP(dst[2*dstStride], AV*tmp0 + BV*tmp1 + CV*tmp2 + DV*tmp3 + EV*tmp4 + FV*tmp5); \
396
+          OP(dst[3*dstStride], AV*tmp1 + BV*tmp2 + CV*tmp3 + DV*tmp4 + EV*tmp5 + FV*tmp6); \
397
+          OP(dst[4*dstStride], AV*tmp2 + BV*tmp3 + CV*tmp4 + DV*tmp5 + EV*tmp6 + FV*tmp7); \
398
+          OP(dst[5*dstStride], AV*tmp3 + BV*tmp4 + CV*tmp5 + DV*tmp6 + EV*tmp7 + FV*tmp8); \
399
+          OP(dst[6*dstStride], AV*tmp4 + BV*tmp5 + CV*tmp6 + DV*tmp7 + EV*tmp8 + FV*tmp9); \
400
+          OP(dst[7*dstStride], AV*tmp5 + BV*tmp6 + CV*tmp7 + DV*tmp8 + EV*tmp9 + FV*tmp10); \
401
+          dst++;                                                        \
402
+          tmp++;                                                        \
403
+        }                                                               \
404
+    }\
405
+}\
406
+\
407
+static void OPNAME ## cavs_filt16_hv_ ## NAME(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int srcStride){ \
408
+    OPNAME ## cavs_filt8_hv_ ## NAME(dst  , src1,   src2  , dstStride, srcStride); \
409
+    OPNAME ## cavs_filt8_hv_ ## NAME(dst+8, src1+8, src2+8, dstStride, srcStride); \
410
+    src1 += 8*srcStride;\
411
+    src2 += 8*srcStride;\
412
+    dst += 8*dstStride;\
413
+    OPNAME ## cavs_filt8_hv_ ## NAME(dst  , src1,   src2  , dstStride, srcStride); \
414
+    OPNAME ## cavs_filt8_hv_ ## NAME(dst+8, src1+8, src2+8, dstStride, srcStride); \
415
+}\
416
+
417
+#define CAVS_MC(OPNAME, SIZE) \
418
+void OPNAME ## cavs_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
419
+    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
420
+}\
421
+void OPNAME ## cavs_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
422
+    OPNAME ## cavs_filt ## SIZE ## _h_qpel_l(dst, src, stride, stride);\
423
+}\
424
+\
425
+void OPNAME ## cavs_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
426
+    OPNAME ## cavs_filt ## SIZE ## _h_hpel(dst, src, stride, stride);\
427
+}\
428
+\
429
+void OPNAME ## cavs_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
430
+    OPNAME ## cavs_filt ## SIZE ## _h_qpel_r(dst, src, stride, stride);\
431
+}\
432
+\
433
+void OPNAME ## cavs_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
434
+    OPNAME ## cavs_filt ## SIZE ## _v_qpel_l(dst, src, stride, stride);\
435
+}\
436
+\
437
+void OPNAME ## cavs_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
438
+    OPNAME ## cavs_filt ## SIZE ## _v_hpel(dst, src, stride, stride);\
439
+}\
440
+\
441
+void OPNAME ## cavs_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
442
+    OPNAME ## cavs_filt ## SIZE ## _v_qpel_r(dst, src, stride, stride);\
443
+}\
444
+\
445
+void OPNAME ## cavs_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
446
+  OPNAME ## cavs_filt ## SIZE ## _hv_jj(dst, src, NULL, stride, stride); \
447
+}\
448
+\
449
+void OPNAME ## cavs_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
450
+  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src, stride, stride); \
451
+}\
452
+\
453
+void OPNAME ## cavs_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
454
+  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride, stride, stride); \
455
+}\
456
+\
457
+void OPNAME ## cavs_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
458
+  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+1, stride, stride); \
459
+}\
460
+\
461
+void OPNAME ## cavs_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
462
+  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride+1,stride, stride); \
463
+}\
464
+\
465
+void OPNAME ## cavs_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
466
+  OPNAME ## cavs_filt ## SIZE ## _hv_ff(dst, src, src+stride+1,stride, stride); \
467
+}\
468
+\
469
+void OPNAME ## cavs_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
470
+  OPNAME ## cavs_filt ## SIZE ## _hv_ii(dst, src, src+stride+1,stride, stride); \
471
+}\
472
+\
473
+void OPNAME ## cavs_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
474
+  OPNAME ## cavs_filt ## SIZE ## _hv_kk(dst, src, src+stride+1,stride, stride); \
475
+}\
476
+\
477
+void OPNAME ## cavs_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
478
+  OPNAME ## cavs_filt ## SIZE ## _hv_qq(dst, src, src+stride+1,stride, stride); \
479
+}\
480
+
481
+#define op_put1(a, b)  a = cm[((b)+4)>>3]
482
+#define op_put2(a, b)  a = cm[((b)+64)>>7]
483
+#define op_put3(a, b)  a = cm[((b)+32)>>6]
484
+#define op_put4(a, b)  a = cm[((b)+512)>>10]
485
+#define op_avg1(a, b)  a = ((a)+cm[((b)+4)>>3]   +1)>>1
486
+#define op_avg2(a, b)  a = ((a)+cm[((b)+64)>>7]  +1)>>1
487
+#define op_avg3(a, b)  a = ((a)+cm[((b)+32)>>6]  +1)>>1
488
+#define op_avg4(a, b)  a = ((a)+cm[((b)+512)>>10]+1)>>1
489
+CAVS_SUBPIX(put_   , op_put1, hpel,    0, -1,  5,  5, -1,  0)
490
+CAVS_SUBPIX(put_   , op_put2, qpel_l, -1, -2, 96, 42, -7,  0)
491
+CAVS_SUBPIX(put_   , op_put2, qpel_r,  0, -7, 42, 96, -2, -1)
492
+CAVS_SUBPIX_HV(put_, op_put3, jj,      0, -1,  5,  5, -1,  0,  0, -1,  5,  5, -1, 0, 0)
493
+CAVS_SUBPIX_HV(put_, op_put4, ff,      0, -1,  5,  5, -1,  0, -1, -2, 96, 42, -7, 0, 0)
494
+CAVS_SUBPIX_HV(put_, op_put4, ii,     -1, -2, 96, 42, -7,  0,  0, -1,  5,  5, -1, 0, 0)
495
+CAVS_SUBPIX_HV(put_, op_put4, kk,      0, -7, 42, 96, -2, -1,  0, -1,  5,  5, -1, 0, 0)
496
+CAVS_SUBPIX_HV(put_, op_put4, qq,      0, -1,  5,  5, -1,  0,  0, -7, 42, 96, -2,-1, 0)
497
+CAVS_SUBPIX_HV(put_, op_put2, egpr,    0, -1,  5,  5, -1,  0,  0, -1,  5,  5, -1, 0, 1)
498
+CAVS_SUBPIX(avg_   , op_avg1, hpel,    0, -1,  5,  5, -1,  0)
499
+CAVS_SUBPIX(avg_   , op_avg2, qpel_l, -1, -2, 96, 42, -7,  0)
500
+CAVS_SUBPIX(avg_   , op_avg2, qpel_r,  0, -7, 42, 96, -2, -1)
501
+CAVS_SUBPIX_HV(avg_, op_avg3, jj,      0, -1,  5,  5, -1,  0,  0, -1,  5,  5, -1, 0, 0)
502
+CAVS_SUBPIX_HV(avg_, op_avg4, ff,      0, -1,  5,  5, -1,  0, -1, -2, 96, 42, -7, 0, 0)
503
+CAVS_SUBPIX_HV(avg_, op_avg4, ii,     -1, -2, 96, 42, -7,  0,  0, -1,  5,  5, -1, 0, 0)
504
+CAVS_SUBPIX_HV(avg_, op_avg4, kk,      0, -7, 42, 96, -2, -1,  0, -1,  5,  5, -1, 0, 0)
505
+CAVS_SUBPIX_HV(avg_, op_avg4, qq,      0, -1,  5,  5, -1,  0,  0, -7, 42, 96, -2,-1, 0)
506
+CAVS_SUBPIX_HV(avg_, op_avg2, egpr,    0, -1,  5,  5, -1,  0,  0, -1,  5,  5, -1, 0, 1)
507
+CAVS_MC(put_, 8)
508
+CAVS_MC(put_, 16)
509
+CAVS_MC(avg_, 8)
510
+CAVS_MC(avg_, 16)
0 511
new file mode 100644
... ...
@@ -0,0 +1,95 @@
0
+/*
1
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
2
+ * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
3
+ *
4
+ * DSP function prototypes
5
+ *
6
+ * This library is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2 of the License, or (at your option) any later version.
10
+ *
11
+ * This library is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with this library; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19
+ */
20
+
21
+void put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
22
+void put_cavs_qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride);
23
+void put_cavs_qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride);
24
+void put_cavs_qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride);
25
+void put_cavs_qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride);
26
+void put_cavs_qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride);
27
+void put_cavs_qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride);
28
+void put_cavs_qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride);
29
+void put_cavs_qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride);
30
+void put_cavs_qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride);
31
+void put_cavs_qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride);
32
+void put_cavs_qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride);
33
+void put_cavs_qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride);
34
+void put_cavs_qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride);
35
+void put_cavs_qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride);
36
+void put_cavs_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride);
37
+void put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
38
+void put_cavs_qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride);
39
+void put_cavs_qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride);
40
+void put_cavs_qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride);
41
+void put_cavs_qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride);
42
+void put_cavs_qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride);
43
+void put_cavs_qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride);
44
+void put_cavs_qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride);
45
+void put_cavs_qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride);
46
+void put_cavs_qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride);
47
+void put_cavs_qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride);
48
+void put_cavs_qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride);
49
+void put_cavs_qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride);
50
+void put_cavs_qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride);
51
+void put_cavs_qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride);
52
+void put_cavs_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride);
53
+void avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
54
+void avg_cavs_qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride);
55
+void avg_cavs_qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride);
56
+void avg_cavs_qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride);
57
+void avg_cavs_qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride);
58
+void avg_cavs_qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride);
59
+void avg_cavs_qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride);
60
+void avg_cavs_qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride);
61
+void avg_cavs_qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride);
62
+void avg_cavs_qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride);
63
+void avg_cavs_qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride);
64
+void avg_cavs_qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride);
65
+void avg_cavs_qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride);
66
+void avg_cavs_qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride);
67
+void avg_cavs_qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride);
68
+void avg_cavs_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride);
69
+void avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
70
+void avg_cavs_qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride);
71
+void avg_cavs_qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride);
72
+void avg_cavs_qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride);
73
+void avg_cavs_qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride);
74
+void avg_cavs_qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride);
75
+void avg_cavs_qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride);
76
+void avg_cavs_qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride);
77
+void avg_cavs_qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride);
78
+void avg_cavs_qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride);
79
+void avg_cavs_qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride);
80
+void avg_cavs_qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride);
81
+void avg_cavs_qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride);
82
+void avg_cavs_qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride);
83
+void avg_cavs_qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride);
84
+void avg_cavs_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride);
85
+void cavs_filter_lv_c(uint8_t *d, int stride, int alpha, int beta, int tc, int bs1, int bs2);
86
+void cavs_filter_lh_c(uint8_t *d, int stride, int alpha, int beta, int tc, int bs1, int bs2);
87
+void cavs_filter_cv_c(uint8_t *d, int stride, int alpha, int beta, int tc, int bs1, int bs2);
88
+void cavs_filter_ch_c(uint8_t *d, int stride, int alpha, int beta, int tc, int bs1, int bs2);
89
+void cavs_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
90
+
91
+void put_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h);
92
+void put_pixels16_c(uint8_t *block, const uint8_t *pixels, int line_size, int h);
93
+void avg_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h);
94
+void avg_pixels16_c(uint8_t *block, const uint8_t *pixels, int line_size, int h);