libavcodec/snow.c
791e7b83
 /*
  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
  *
b78e7197
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
791e7b83
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
b78e7197
  * version 2.1 of the License, or (at your option) any later version.
791e7b83
  *
b78e7197
  * FFmpeg is distributed in the hope that it will be useful,
791e7b83
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
b78e7197
  * License along with FFmpeg; if not, write to the Free Software
5509bffa
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
  */
 
94ca624f
 #include "libavutil/intmath.h"
0cc06b9e
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
791e7b83
 #include "avcodec.h"
3a2d1465
 #include "me_cmp.h"
30981a96
 #include "snow_dwt.h"
874c5b02
 #include "internal.h"
059715a4
 #include "snow.h"
7f1b4270
 #include "snowdata.h"
28869757
 
 #include "rangecoder.h"
199436b9
 #include "mathops.h"
c26e58e3
 #include "h263.h"
791e7b83
 
6b2f6646
 
7f1b4270
 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
     int y, x;
     IDWTELEM * dst;
     for(y=0; y<b_h; y++){
         //FIXME ugly misuse of obmc_stride
         const uint8_t *obmc1= obmc + y*obmc_stride;
         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
         dst = slice_buffer_get_line(sb, src_y + y);
         for(x=0; x<b_w; x++){
             int v=   obmc1[x] * block[3][x + y*src_stride]
                     +obmc2[x] * block[2][x + y*src_stride]
                     +obmc3[x] * block[1][x + y*src_stride]
                     +obmc4[x] * block[0][x + y*src_stride];
115329f1
 
7f1b4270
             v <<= 8 - LOG2_OBMC_MAX;
             if(FRAC_BITS != 8){
                 v >>= 8 - FRAC_BITS;
7b49c309
             }
7f1b4270
             if(add){
                 v += dst[x + src_x];
                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
                 if(v&(~255)) v= ~(v>>31);
                 dst8[x + y*src_stride] = v;
4536c8e6
             }else{
7f1b4270
                 dst[x + src_x] -= v;
791e7b83
             }
         }
4536c8e6
     }
a0d1931c
 }
 
b96c9513
 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
 {
     int ret, i;
aa1d096d
     int edges_needed = av_codec_is_encoder(s->avctx->codec);
b96c9513
 
aa1d096d
     frame->width  = s->avctx->width ;
     frame->height = s->avctx->height;
     if (edges_needed) {
         frame->width  += 2 * EDGE_WIDTH;
         frame->height += 2 * EDGE_WIDTH;
     }
b96c9513
     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
aa1d096d
     if (edges_needed) {
         for (i = 0; frame->data[i]; i++) {
             int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
                             frame->linesize[i] +
                             (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
             frame->data[i] += offset;
         }
         frame->width  = s->avctx->width;
         frame->height = s->avctx->height;
b96c9513
     }
 
     return 0;
 }
 
7f1b4270
 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
791e7b83
     int plane_index, level, orientation;
 
19aa028d
     for(plane_index=0; plane_index<3; plane_index++){
4f90f33a
         for(level=0; level<MAX_DECOMPOSITIONS; level++){
791e7b83
             for(orientation=level ? 1:0; orientation<4; orientation++){
28869757
                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
             }
         }
     }
28869757
     memset(s->header_state, MID_STATE, sizeof(s->header_state));
     memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
 }
 
7f1b4270
 int ff_snow_alloc_blocks(SnowContext *s){
21f94684
     int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
     int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
115329f1
 
155ec6ed
     s->b_width = w;
     s->b_height= h;
115329f1
 
dc7f45a0
     av_free(s->block);
71f8cdcc
     s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
f13e7331
     if (!s->block)
         return AVERROR(ENOMEM);
 
155ec6ed
     return 0;
 }
 
8cdf95d9
 static av_cold void init_qexp(void){
155ec6ed
     int i;
7f1b4270
     double v=128;
155ec6ed
 
7f1b4270
     for(i=0; i<QROOT; i++){
35e02a3d
         ff_qexp[i]= lrintf(v);
7f1b4270
         v *= pow(2, 1.0 / QROOT);
155ec6ed
     }
791e7b83
 }
5262f7ed
 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
7942269b
     static const uint8_t weight[64]={
a68ca08e
     8,7,6,5,4,3,2,1,
     7,7,0,0,0,0,0,1,
     6,0,6,0,0,0,2,0,
     5,0,0,5,0,3,0,0,
     4,0,0,0,4,0,0,0,
     3,0,0,5,0,3,0,0,
     2,0,6,0,0,0,2,0,
     1,7,0,0,0,0,0,1,
     };
 
7942269b
     static const uint8_t brane[256]={
a68ca08e
     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
     };
 
7942269b
     static const uint8_t needs[16]={
a68ca08e
     0,1,0,0,
     2,4,2,0,
     0,1,0,0,
     15
     };
 
     int x, y, b, r, l;
61d6e445
     int16_t tmpIt   [64*(32+HTAPS_MAX)];
58e57c30
     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
a68ca08e
     int16_t *tmpI= tmpIt;
     uint8_t *tmp2= tmp2t[0];
f0a70840
     const uint8_t *hpel[11];
be64d5f5
     av_assert2(dx<16 && dy<16);
a68ca08e
     r= brane[dx + 16*dy]&15;
     l= brane[dx + 16*dy]>>4;
 
     b= needs[l] | needs[r];
7d7f57d9
     if(p && !p->diag_mc)
         b= 15;
a68ca08e
 
     if(b&5){
61d6e445
         for(y=0; y < b_h+HTAPS_MAX-1; y++){
65dc0f53
             for(x=0; x < b_w; x++){
61d6e445
                 int a_1=src[x + HTAPS_MAX/2-4];
                 int a0= src[x + HTAPS_MAX/2-3];
                 int a1= src[x + HTAPS_MAX/2-2];
                 int a2= src[x + HTAPS_MAX/2-1];
                 int a3= src[x + HTAPS_MAX/2+0];
                 int a4= src[x + HTAPS_MAX/2+1];
                 int a5= src[x + HTAPS_MAX/2+2];
                 int a6= src[x + HTAPS_MAX/2+3];
7d7f57d9
                 int am=0;
                 if(!p || p->fast_mc){
                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
                     tmpI[x]= am;
                     am= (am+16)>>5;
                 }else{
                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
                     tmpI[x]= am;
                     am= (am+32)>>6;
                 }
791e7b83
 
65dc0f53
                 if(am&(~255)) am= ~(am>>31);
                 tmp2[x]= am;
             }
             tmpI+= 64;
58e57c30
             tmp2+= 64;
65dc0f53
             src += stride;
791e7b83
         }
65dc0f53
         src -= stride*y;
a68ca08e
     }
61d6e445
     src += HTAPS_MAX/2 - 1;
a68ca08e
     tmp2= tmp2t[1];
115329f1
 
a68ca08e
     if(b&2){
65dc0f53
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w+1; x++){
61d6e445
                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
7d7f57d9
                 int am=0;
                 if(!p || p->fast_mc)
                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
                 else
                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
791e7b83
 
65dc0f53
                 if(am&(~255)) am= ~(am>>31);
                 tmp2[x]= am;
             }
             src += stride;
58e57c30
             tmp2+= 64;
a68ca08e
         }
65dc0f53
         src -= stride*y;
a68ca08e
     }
61d6e445
     src += stride*(HTAPS_MAX/2 - 1);
a68ca08e
     tmp2= tmp2t[2];
     tmpI= tmpIt;
     if(b&4){
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w; x++){
61d6e445
                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
7d7f57d9
                 int am=0;
                 if(!p || p->fast_mc)
                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
                 else
                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
a68ca08e
                 if(am&(~255)) am= ~(am>>31);
                 tmp2[x]= am;
             }
             tmpI+= 64;
58e57c30
             tmp2+= 64;
a68ca08e
         }
     }
115329f1
 
a68ca08e
     hpel[ 0]= src;
58e57c30
     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
a68ca08e
     hpel[ 2]= src + 1;
 
     hpel[ 4]= tmp2t[1];
     hpel[ 5]= tmp2t[2];
     hpel[ 6]= tmp2t[1] + 1;
 
     hpel[ 8]= src + stride;
58e57c30
     hpel[ 9]= hpel[1] + 64;
a68ca08e
     hpel[10]= hpel[8] + 1;
 
58e57c30
 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
 
a68ca08e
     if(b==15){
58e57c30
         int dxy = dx / 8 + dy / 8 * 4;
         const uint8_t *src1 = hpel[dxy    ];
         const uint8_t *src2 = hpel[dxy + 1];
         const uint8_t *src3 = hpel[dxy + 4];
         const uint8_t *src4 = hpel[dxy + 5];
         int stride1 = MC_STRIDE(dxy);
         int stride2 = MC_STRIDE(dxy + 1);
         int stride3 = MC_STRIDE(dxy + 4);
         int stride4 = MC_STRIDE(dxy + 5);
a68ca08e
         dx&=7;
         dy&=7;
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w; x++){
                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
             }
58e57c30
             src1+=stride1;
             src2+=stride2;
             src3+=stride3;
             src4+=stride4;
a68ca08e
             dst +=stride;
         }
     }else{
f0a70840
         const uint8_t *src1= hpel[l];
         const uint8_t *src2= hpel[r];
58e57c30
         int stride1 = MC_STRIDE(l);
         int stride2 = MC_STRIDE(r);
a68ca08e
         int a= weight[((dx&7) + (8*(dy&7)))];
         int b= 8-a;
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w; x++){
                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
             }
58e57c30
             src1+=stride1;
             src2+=stride2;
a68ca08e
             dst +=stride;
791e7b83
         }
     }
 }
 
3a7f9db1
 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
51d6a3cf
     if(block->type & BLOCK_INTRA){
ff158dc9
         int x, y;
ff6d9cc5
         const unsigned color  = block->color[plane_index];
         const unsigned color4 = color*0x01010101;
1015631b
         if(b_w==32){
             for(y=0; y < b_h; y++){
                 *(uint32_t*)&dst[0 + y*stride]= color4;
                 *(uint32_t*)&dst[4 + y*stride]= color4;
                 *(uint32_t*)&dst[8 + y*stride]= color4;
                 *(uint32_t*)&dst[12+ y*stride]= color4;
                 *(uint32_t*)&dst[16+ y*stride]= color4;
                 *(uint32_t*)&dst[20+ y*stride]= color4;
                 *(uint32_t*)&dst[24+ y*stride]= color4;
                 *(uint32_t*)&dst[28+ y*stride]= color4;
             }
         }else if(b_w==16){
2692ceab
             for(y=0; y < b_h; y++){
                 *(uint32_t*)&dst[0 + y*stride]= color4;
                 *(uint32_t*)&dst[4 + y*stride]= color4;
                 *(uint32_t*)&dst[8 + y*stride]= color4;
                 *(uint32_t*)&dst[12+ y*stride]= color4;
             }
         }else if(b_w==8){
             for(y=0; y < b_h; y++){
                 *(uint32_t*)&dst[0 + y*stride]= color4;
                 *(uint32_t*)&dst[4 + y*stride]= color4;
             }
         }else if(b_w==4){
             for(y=0; y < b_h; y++){
                 *(uint32_t*)&dst[0 + y*stride]= color4;
             }
         }else{
             for(y=0; y < b_h; y++){
                 for(x=0; x < b_w; x++){
                     dst[x + y*stride]= color;
                 }
ff158dc9
             }
         }
     }else{
cf06dee5
         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
68c845cd
         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
ff158dc9
         int mx= block->mx*scale;
         int my= block->my*scale;
ec697587
         const int dx= mx&15;
         const int dy= my&15;
80e44bc3
         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
61d6e445
         sx += (mx>>4) - (HTAPS_MAX/2-1);
         sy += (my>>4) - (HTAPS_MAX/2-1);
ff158dc9
         src += sx + sy*stride;
26452e24
         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
91e00c4a
             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
                                      stride, stride,
face578d
                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
                                      sx, sy, w, h);
ff158dc9
             src= tmp + MB_SIZE;
         }
68c845cd
 
         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
 
be64d5f5
         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
e868f84e
         if(    (dx&3) || (dy&3)
             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
             || (b_w&(b_w-1))
             || b_w == 1
             || b_h == 1
             || !s->plane[plane_index].fast_mc )
5262f7ed
             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
1015631b
         else if(b_w==32){
             int y;
             for(y=0; y<b_h; y+=16){
fc13a896
                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1015631b
             }
         }else if(b_w==b_h)
fc13a896
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2692ceab
         else if(b_w==2*b_h){
fc13a896
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2692ceab
         }else{
be64d5f5
             av_assert2(2*b_w==b_h);
fc13a896
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2692ceab
         }
ff158dc9
     }
 }
 
7f1b4270
 #define mca(dx,dy,b_w)\
0ddca7d4
 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
be64d5f5
     av_assert2(h==b_w);\
7f1b4270
     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
d773d855
 }
 
7f1b4270
 mca( 0, 0,16)
 mca( 8, 0,16)
 mca( 0, 8,16)
 mca( 8, 8,16)
 mca( 0, 0,8)
 mca( 8, 0,8)
 mca( 0, 8,8)
 mca( 8, 8,8)
d773d855
 
7f1b4270
 av_cold int ff_snow_common_init(AVCodecContext *avctx){
d773d855
     SnowContext *s = avctx->priv_data;
     int width, height;
5eb4af6c
     int i, j;
d773d855
 
     s->avctx= avctx;
d5ce725c
     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
dd369c9a
     s->spatial_decomposition_count = 1;
d773d855
 
3a2d1465
     ff_me_cmp_init(&s->mecc, avctx);
771ba8f2
     ff_hpeldsp_init(&s->hdsp, avctx->flags);
a41bf09d
     ff_videodsp_init(&s->vdsp, 8);
05aec7bb
     ff_dwt_init(&s->dwt);
fc13a896
     ff_h264qpel_init(&s->h264qpel, 8);
d773d855
 
 #define mcf(dx,dy)\
98a6806f
     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
fc13a896
         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
98a6806f
     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
fc13a896
         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
d773d855
 
     mcf( 0, 0)
     mcf( 4, 0)
     mcf( 8, 0)
     mcf(12, 0)
     mcf( 0, 4)
     mcf( 4, 4)
     mcf( 8, 4)
     mcf(12, 4)
     mcf( 0, 8)
     mcf( 4, 8)
     mcf( 8, 8)
     mcf(12, 8)
     mcf( 0,12)
     mcf( 4,12)
     mcf( 8,12)
     mcf(12,12)
 
 #define mcfh(dx,dy)\
771ba8f2
     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
d773d855
         mc_block_hpel ## dx ## dy ## 16;\
771ba8f2
     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
d773d855
         mc_block_hpel ## dx ## dy ## 8;
 
     mcfh(0, 0)
     mcfh(8, 0)
     mcfh(0, 8)
     mcfh(8, 8)
 
7f1b4270
     init_qexp();
d773d855
 
 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
 
     width= s->avctx->width;
     height= s->avctx->height;
 
0dadbbbf
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer,  width, height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer,     width, sizeof(DWTELEM),  fail);
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer,    width, sizeof(IDWTELEM), fail);
     FF_ALLOC_ARRAY_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
d773d855
 
c5362538
     for(i=0; i<MAX_REF_FRAMES; i++) {
d773d855
         for(j=0; j<MAX_REF_FRAMES; j++)
35e02a3d
             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
cf06dee5
         s->last_picture[i] = av_frame_alloc();
24b4e6c3
         if (!s->last_picture[i])
             goto fail;
c5362538
     }
 
cf06dee5
     s->mconly_picture = av_frame_alloc();
     s->current_picture = av_frame_alloc();
24b4e6c3
     if (!s->mconly_picture || !s->current_picture)
         goto fail;
d773d855
 
     return 0;
4d8516fd
 fail:
     return AVERROR(ENOMEM);
d773d855
 }
 
7f1b4270
 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
d773d855
     SnowContext *s = avctx->priv_data;
     int plane_index, level, orientation;
5eb4af6c
     int ret, emu_buf_size;
 
     if(!s->scratchbuf) {
cf06dee5
         if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
1ec94b0f
                                  AV_GET_BUFFER_FLAG_REF)) < 0)
5eb4af6c
             return ret;
0dadbbbf
         FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
cf06dee5
         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
5eb4af6c
         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
     }
 
cf06dee5
     if(s->mconly_picture->format != avctx->pix_fmt) {
5eb4af6c
         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
         return AVERROR_INVALIDDATA;
     }
d773d855
 
c4224fff
     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
d773d855
         int w= s->avctx->width;
         int h= s->avctx->height;
 
         if(plane_index){
21f94684
             w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
             h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
d773d855
         }
         s->plane[plane_index].width = w;
         s->plane[plane_index].height= h;
 
         for(level=s->spatial_decomposition_count-1; level>=0; level--){
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
                 SubBand *b= &s->plane[plane_index].band[level][orientation];
 
                 b->buf= s->spatial_dwt_buffer;
                 b->level= level;
                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
                 b->width = (w + !(orientation&1))>>1;
                 b->height= (h + !(orientation>1))>>1;
 
                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
                 b->buf_x_offset = 0;
                 b->buf_y_offset = 0;
 
                 if(orientation&1){
                     b->buf += (w+1)>>1;
                     b->buf_x_offset = (w+1)>>1;
                 }
                 if(orientation>1){
                     b->buf += b->stride>>1;
                     b->buf_y_offset = b->stride_line >> 1;
                 }
                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
 
                 if(level)
                     b->parent= &s->plane[plane_index].band[level-1][orientation];
                 //FIXME avoid this realloc
                 av_freep(&b->x_coeff);
71f8cdcc
                 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
f13e7331
                 if (!b->x_coeff)
                     goto fail;
d773d855
             }
             w= (w+1)>>1;
             h= (h+1)>>1;
         }
     }
 
     return 0;
5eb4af6c
 fail:
     return AVERROR(ENOMEM);
d773d855
 }
 
 #define USE_HALFPEL_PLANE 0
 
f13e7331
 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
d773d855
     int p,x,y;
 
c4224fff
     for(p=0; p < s->nb_planes; p++){
d773d855
         int is_chroma= !!p;
21f94684
         int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
         int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
d773d855
         int ls= frame->linesize[p];
         uint8_t *src= frame->data[p];
 
56c7e105
         halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
         halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
         halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
             av_freep(&halfpel[1][p]);
             av_freep(&halfpel[2][p]);
             av_freep(&halfpel[3][p]);
f13e7331
             return AVERROR(ENOMEM);
56c7e105
         }
         halfpel[1][p] += EDGE_WIDTH * (1 + ls);
         halfpel[2][p] += EDGE_WIDTH * (1 + ls);
         halfpel[3][p] += EDGE_WIDTH * (1 + ls);
d773d855
 
         halfpel[0][p]= src;
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
                 int i= y*ls + x;
 
                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
             }
         }
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
                 int i= y*ls + x;
 
                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
             }
         }
         src= halfpel[1][p];
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
                 int i= y*ls + x;
 
                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
             }
         }
 
 //FIXME border!
     }
f13e7331
     return 0;
d773d855
 }
 
7f1b4270
 void ff_snow_release_buffer(AVCodecContext *avctx)
 {
d773d855
     SnowContext *s = avctx->priv_data;
     int i;
 
cf06dee5
     if(s->last_picture[s->max_ref_frames-1]->data[0]){
         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
d773d855
         for(i=0; i<9; i++)
7ababb85
             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
cf06dee5
                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
7ababb85
                 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
             }
d773d855
     }
 }
 
7f1b4270
 int ff_snow_frame_start(SnowContext *s){
cf06dee5
    AVFrame *tmp;
1ec94b0f
    int i, ret;
d773d855
 
7f1b4270
     ff_snow_release_buffer(s->avctx);
d773d855
 
cf06dee5
     tmp= s->last_picture[s->max_ref_frames-1];
80e9e63c
     for(i=s->max_ref_frames-1; i>0; i--)
cf06dee5
         s->last_picture[i] = s->last_picture[i-1];
d773d855
     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
cf06dee5
     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
f13e7331
             return ret;
     }
cf06dee5
     s->last_picture[0] = s->current_picture;
     s->current_picture = tmp;
d773d855
 
     if(s->keyframe){
         s->ref_frames= 0;
     }else{
         int i;
cf06dee5
         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
             if(i && s->last_picture[i-1]->key_frame)
d773d855
                 break;
         s->ref_frames= i;
         if(s->ref_frames==0){
             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
4cb7cd4c
             return AVERROR_INVALIDDATA;
d773d855
         }
     }
b96c9513
     if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
1ec94b0f
         return ret;
d773d855
 
cf06dee5
     s->current_picture->key_frame= s->keyframe;
d773d855
 
     return 0;
 }
 
7f1b4270
 av_cold void ff_snow_common_end(SnowContext *s)
 {
d773d855
     int plane_index, level, orientation, i;
 
     av_freep(&s->spatial_dwt_buffer);
d9669eab
     av_freep(&s->temp_dwt_buffer);
d773d855
     av_freep(&s->spatial_idwt_buffer);
d9669eab
     av_freep(&s->temp_idwt_buffer);
cbd9b2f9
     av_freep(&s->run_buffer);
d773d855
 
     s->m.me.temp= NULL;
     av_freep(&s->m.me.scratchpad);
     av_freep(&s->m.me.map);
     av_freep(&s->m.me.score_map);
db8ae37a
     av_freep(&s->m.sc.obmc_scratchpad);
d773d855
 
     av_freep(&s->block);
     av_freep(&s->scratchbuf);
33895451
     av_freep(&s->emu_edge_buffer);
d773d855
 
     for(i=0; i<MAX_REF_FRAMES; i++){
         av_freep(&s->ref_mvs[i]);
         av_freep(&s->ref_scores[i]);
9a162146
         if(s->last_picture[i] && s->last_picture[i]->data[0]) {
cf06dee5
             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
69971410
         }
cf06dee5
         av_frame_free(&s->last_picture[i]);
d773d855
     }
 
721c916d
     for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
         for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
d773d855
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
                 SubBand *b= &s->plane[plane_index].band[level][orientation];
 
                 av_freep(&b->x_coeff);
             }
         }
     }
cf06dee5
     av_frame_free(&s->mconly_picture);
     av_frame_free(&s->current_picture);
d773d855
 }