libavcodec/snow.c
791e7b83
 /*
  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
  *
b78e7197
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
791e7b83
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
b78e7197
  * version 2.1 of the License, or (at your option) any later version.
791e7b83
  *
b78e7197
  * FFmpeg is distributed in the hope that it will be useful,
791e7b83
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
b78e7197
  * License along with FFmpeg; if not, write to the Free Software
5509bffa
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
791e7b83
  */
 
94ca624f
 #include "libavutil/intmath.h"
0cc06b9e
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
791e7b83
 #include "avcodec.h"
 #include "dsputil.h"
30981a96
 #include "snow_dwt.h"
874c5b02
 #include "internal.h"
059715a4
 #include "snow.h"
7f1b4270
 #include "snowdata.h"
28869757
 
 #include "rangecoder.h"
199436b9
 #include "mathops.h"
c26e58e3
 #include "h263.h"
791e7b83
 
6b2f6646
 
7f1b4270
 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
     int y, x;
     IDWTELEM * dst;
     for(y=0; y<b_h; y++){
         //FIXME ugly misuse of obmc_stride
         const uint8_t *obmc1= obmc + y*obmc_stride;
         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
         dst = slice_buffer_get_line(sb, src_y + y);
         for(x=0; x<b_w; x++){
             int v=   obmc1[x] * block[3][x + y*src_stride]
                     +obmc2[x] * block[2][x + y*src_stride]
                     +obmc3[x] * block[1][x + y*src_stride]
                     +obmc4[x] * block[0][x + y*src_stride];
115329f1
 
7f1b4270
             v <<= 8 - LOG2_OBMC_MAX;
             if(FRAC_BITS != 8){
                 v >>= 8 - FRAC_BITS;
7b49c309
             }
7f1b4270
             if(add){
                 v += dst[x + src_x];
                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
                 if(v&(~255)) v= ~(v>>31);
                 dst8[x + y*src_stride] = v;
4536c8e6
             }else{
7f1b4270
                 dst[x + src_x] -= v;
791e7b83
             }
         }
4536c8e6
     }
a0d1931c
 }
 
7f1b4270
 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
791e7b83
     int plane_index, level, orientation;
 
19aa028d
     for(plane_index=0; plane_index<3; plane_index++){
4f90f33a
         for(level=0; level<MAX_DECOMPOSITIONS; level++){
791e7b83
             for(orientation=level ? 1:0; orientation<4; orientation++){
28869757
                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
791e7b83
             }
         }
     }
28869757
     memset(s->header_state, MID_STATE, sizeof(s->header_state));
     memset(s->block_state, MID_STATE, sizeof(s->block_state));
155ec6ed
 }
 
7f1b4270
 int ff_snow_alloc_blocks(SnowContext *s){
155ec6ed
     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
115329f1
 
155ec6ed
     s->b_width = w;
     s->b_height= h;
115329f1
 
dc7f45a0
     av_free(s->block);
155ec6ed
     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
     return 0;
 }
 
7f1b4270
 static void init_qexp(void){
155ec6ed
     int i;
7f1b4270
     double v=128;
155ec6ed
 
7f1b4270
     for(i=0; i<QROOT; i++){
35e02a3d
         ff_qexp[i]= lrintf(v);
7f1b4270
         v *= pow(2, 1.0 / QROOT);
155ec6ed
     }
791e7b83
 }
5262f7ed
 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
7942269b
     static const uint8_t weight[64]={
a68ca08e
     8,7,6,5,4,3,2,1,
     7,7,0,0,0,0,0,1,
     6,0,6,0,0,0,2,0,
     5,0,0,5,0,3,0,0,
     4,0,0,0,4,0,0,0,
     3,0,0,5,0,3,0,0,
     2,0,6,0,0,0,2,0,
     1,7,0,0,0,0,0,1,
     };
 
7942269b
     static const uint8_t brane[256]={
a68ca08e
     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
     };
 
7942269b
     static const uint8_t needs[16]={
a68ca08e
     0,1,0,0,
     2,4,2,0,
     0,1,0,0,
     15
     };
 
     int x, y, b, r, l;
61d6e445
     int16_t tmpIt   [64*(32+HTAPS_MAX)];
58e57c30
     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
a68ca08e
     int16_t *tmpI= tmpIt;
     uint8_t *tmp2= tmp2t[0];
f0a70840
     const uint8_t *hpel[11];
be64d5f5
     av_assert2(dx<16 && dy<16);
a68ca08e
     r= brane[dx + 16*dy]&15;
     l= brane[dx + 16*dy]>>4;
 
     b= needs[l] | needs[r];
7d7f57d9
     if(p && !p->diag_mc)
         b= 15;
a68ca08e
 
     if(b&5){
61d6e445
         for(y=0; y < b_h+HTAPS_MAX-1; y++){
65dc0f53
             for(x=0; x < b_w; x++){
61d6e445
                 int a_1=src[x + HTAPS_MAX/2-4];
                 int a0= src[x + HTAPS_MAX/2-3];
                 int a1= src[x + HTAPS_MAX/2-2];
                 int a2= src[x + HTAPS_MAX/2-1];
                 int a3= src[x + HTAPS_MAX/2+0];
                 int a4= src[x + HTAPS_MAX/2+1];
                 int a5= src[x + HTAPS_MAX/2+2];
                 int a6= src[x + HTAPS_MAX/2+3];
7d7f57d9
                 int am=0;
                 if(!p || p->fast_mc){
                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
                     tmpI[x]= am;
                     am= (am+16)>>5;
                 }else{
                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
                     tmpI[x]= am;
                     am= (am+32)>>6;
                 }
791e7b83
 
65dc0f53
                 if(am&(~255)) am= ~(am>>31);
                 tmp2[x]= am;
             }
             tmpI+= 64;
58e57c30
             tmp2+= 64;
65dc0f53
             src += stride;
791e7b83
         }
65dc0f53
         src -= stride*y;
a68ca08e
     }
61d6e445
     src += HTAPS_MAX/2 - 1;
a68ca08e
     tmp2= tmp2t[1];
115329f1
 
a68ca08e
     if(b&2){
65dc0f53
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w+1; x++){
61d6e445
                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
7d7f57d9
                 int am=0;
                 if(!p || p->fast_mc)
                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
                 else
                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
791e7b83
 
65dc0f53
                 if(am&(~255)) am= ~(am>>31);
                 tmp2[x]= am;
             }
             src += stride;
58e57c30
             tmp2+= 64;
a68ca08e
         }
65dc0f53
         src -= stride*y;
a68ca08e
     }
61d6e445
     src += stride*(HTAPS_MAX/2 - 1);
a68ca08e
     tmp2= tmp2t[2];
     tmpI= tmpIt;
     if(b&4){
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w; x++){
61d6e445
                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
7d7f57d9
                 int am=0;
                 if(!p || p->fast_mc)
                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
                 else
                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
a68ca08e
                 if(am&(~255)) am= ~(am>>31);
                 tmp2[x]= am;
             }
             tmpI+= 64;
58e57c30
             tmp2+= 64;
a68ca08e
         }
     }
115329f1
 
a68ca08e
     hpel[ 0]= src;
58e57c30
     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
a68ca08e
     hpel[ 2]= src + 1;
 
     hpel[ 4]= tmp2t[1];
     hpel[ 5]= tmp2t[2];
     hpel[ 6]= tmp2t[1] + 1;
 
     hpel[ 8]= src + stride;
58e57c30
     hpel[ 9]= hpel[1] + 64;
a68ca08e
     hpel[10]= hpel[8] + 1;
 
58e57c30
 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
 
a68ca08e
     if(b==15){
58e57c30
         int dxy = dx / 8 + dy / 8 * 4;
         const uint8_t *src1 = hpel[dxy    ];
         const uint8_t *src2 = hpel[dxy + 1];
         const uint8_t *src3 = hpel[dxy + 4];
         const uint8_t *src4 = hpel[dxy + 5];
         int stride1 = MC_STRIDE(dxy);
         int stride2 = MC_STRIDE(dxy + 1);
         int stride3 = MC_STRIDE(dxy + 4);
         int stride4 = MC_STRIDE(dxy + 5);
a68ca08e
         dx&=7;
         dy&=7;
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w; x++){
                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
             }
58e57c30
             src1+=stride1;
             src2+=stride2;
             src3+=stride3;
             src4+=stride4;
a68ca08e
             dst +=stride;
         }
     }else{
f0a70840
         const uint8_t *src1= hpel[l];
         const uint8_t *src2= hpel[r];
58e57c30
         int stride1 = MC_STRIDE(l);
         int stride2 = MC_STRIDE(r);
a68ca08e
         int a= weight[((dx&7) + (8*(dy&7)))];
         int b= 8-a;
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w; x++){
                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
             }
58e57c30
             src1+=stride1;
             src2+=stride2;
a68ca08e
             dst +=stride;
791e7b83
         }
     }
 }
 
7f1b4270
 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
51d6a3cf
     if(block->type & BLOCK_INTRA){
ff158dc9
         int x, y;
ff6d9cc5
         const unsigned color  = block->color[plane_index];
         const unsigned color4 = color*0x01010101;
1015631b
         if(b_w==32){
             for(y=0; y < b_h; y++){
                 *(uint32_t*)&dst[0 + y*stride]= color4;
                 *(uint32_t*)&dst[4 + y*stride]= color4;
                 *(uint32_t*)&dst[8 + y*stride]= color4;
                 *(uint32_t*)&dst[12+ y*stride]= color4;
                 *(uint32_t*)&dst[16+ y*stride]= color4;
                 *(uint32_t*)&dst[20+ y*stride]= color4;
                 *(uint32_t*)&dst[24+ y*stride]= color4;
                 *(uint32_t*)&dst[28+ y*stride]= color4;
             }
         }else if(b_w==16){
2692ceab
             for(y=0; y < b_h; y++){
                 *(uint32_t*)&dst[0 + y*stride]= color4;
                 *(uint32_t*)&dst[4 + y*stride]= color4;
                 *(uint32_t*)&dst[8 + y*stride]= color4;
                 *(uint32_t*)&dst[12+ y*stride]= color4;
             }
         }else if(b_w==8){
             for(y=0; y < b_h; y++){
                 *(uint32_t*)&dst[0 + y*stride]= color4;
                 *(uint32_t*)&dst[4 + y*stride]= color4;
             }
         }else if(b_w==4){
             for(y=0; y < b_h; y++){
                 *(uint32_t*)&dst[0 + y*stride]= color4;
             }
         }else{
             for(y=0; y < b_h; y++){
                 for(x=0; x < b_w; x++){
                     dst[x + y*stride]= color;
                 }
ff158dc9
             }
         }
     }else{
8c36eaaa
         uint8_t *src= s->last_picture[block->ref].data[plane_index];
68c845cd
         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
ff158dc9
         int mx= block->mx*scale;
         int my= block->my*scale;
ec697587
         const int dx= mx&15;
         const int dy= my&15;
80e44bc3
         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
61d6e445
         sx += (mx>>4) - (HTAPS_MAX/2-1);
         sy += (my>>4) - (HTAPS_MAX/2-1);
ff158dc9
         src += sx + sy*stride;
26452e24
         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
a41bf09d
             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
ff158dc9
             src= tmp + MB_SIZE;
         }
68c845cd
 
         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
 
87f20c2f
 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
 //        assert(!(b_w&(b_w-1)));
be64d5f5
         av_assert2(b_w>1 && b_h>1);
         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
7d7f57d9
         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
5262f7ed
             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
1015631b
         else if(b_w==32){
             int y;
             for(y=0; y<b_h; y+=16){
fc13a896
                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1015631b
             }
         }else if(b_w==b_h)
fc13a896
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2692ceab
         else if(b_w==2*b_h){
fc13a896
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2692ceab
         }else{
be64d5f5
             av_assert2(2*b_w==b_h);
fc13a896
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2692ceab
         }
ff158dc9
     }
 }
 
7f1b4270
 #define mca(dx,dy,b_w)\
0ddca7d4
 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
be64d5f5
     av_assert2(h==b_w);\
7f1b4270
     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
d773d855
 }
 
7f1b4270
 mca( 0, 0,16)
 mca( 8, 0,16)
 mca( 0, 8,16)
 mca( 8, 8,16)
 mca( 0, 0,8)
 mca( 8, 0,8)
 mca( 0, 8,8)
 mca( 8, 8,8)
d773d855
 
7f1b4270
 av_cold int ff_snow_common_init(AVCodecContext *avctx){
d773d855
     SnowContext *s = avctx->priv_data;
     int width, height;
5eb4af6c
     int i, j;
d773d855
 
     s->avctx= avctx;
d5ce725c
     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
d773d855
 
9cf0841e
     ff_dsputil_init(&s->dsp, avctx);
a41bf09d
     ff_videodsp_init(&s->vdsp, 8);
05aec7bb
     ff_dwt_init(&s->dwt);
fc13a896
     ff_h264qpel_init(&s->h264qpel, 8);
d773d855
 
 #define mcf(dx,dy)\
     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
fc13a896
         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
d773d855
     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
fc13a896
         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
d773d855
 
     mcf( 0, 0)
     mcf( 4, 0)
     mcf( 8, 0)
     mcf(12, 0)
     mcf( 0, 4)
     mcf( 4, 4)
     mcf( 8, 4)
     mcf(12, 4)
     mcf( 0, 8)
     mcf( 4, 8)
     mcf( 8, 8)
     mcf(12, 8)
     mcf( 0,12)
     mcf( 4,12)
     mcf( 8,12)
     mcf(12,12)
 
 #define mcfh(dx,dy)\
     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
         mc_block_hpel ## dx ## dy ## 16;\
     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
         mc_block_hpel ## dx ## dy ## 8;
 
     mcfh(0, 0)
     mcfh(8, 0)
     mcfh(0, 8)
     mcfh(8, 8)
 
7f1b4270
     init_qexp();
d773d855
 
 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
 
     width= s->avctx->width;
     height= s->avctx->height;
 
4d8516fd
     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_idwt_buffer, width * height * sizeof(IDWTELEM), fail);
     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_dwt_buffer,  width * height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
     FF_ALLOCZ_OR_GOTO(avctx, s->temp_dwt_buffer,     width * sizeof(DWTELEM),  fail);
     FF_ALLOCZ_OR_GOTO(avctx, s->temp_idwt_buffer,    width * sizeof(IDWTELEM), fail);
cbd9b2f9
     FF_ALLOC_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1) * ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
d773d855
 
     for(i=0; i<MAX_REF_FRAMES; i++)
         for(j=0; j<MAX_REF_FRAMES; j++)
35e02a3d
             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
d773d855
 
     return 0;
4d8516fd
 fail:
     return AVERROR(ENOMEM);
d773d855
 }
 
7f1b4270
 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
d773d855
     SnowContext *s = avctx->priv_data;
     int plane_index, level, orientation;
5eb4af6c
     int ret, emu_buf_size;
 
     if(!s->scratchbuf) {
874c5b02
         if ((ret = ff_get_buffer(s->avctx, &s->mconly_picture)) < 0) {
5eb4af6c
             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
             return ret;
         }
         FF_ALLOCZ_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256)*7*MB_SIZE, fail);
         emu_buf_size = FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
     }
 
     if(s->mconly_picture.format != avctx->pix_fmt) {
         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
         return AVERROR_INVALIDDATA;
     }
d773d855
 
     for(plane_index=0; plane_index<3; plane_index++){
         int w= s->avctx->width;
         int h= s->avctx->height;
 
         if(plane_index){
             w>>= s->chroma_h_shift;
             h>>= s->chroma_v_shift;
         }
         s->plane[plane_index].width = w;
         s->plane[plane_index].height= h;
 
         for(level=s->spatial_decomposition_count-1; level>=0; level--){
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
                 SubBand *b= &s->plane[plane_index].band[level][orientation];
 
                 b->buf= s->spatial_dwt_buffer;
                 b->level= level;
                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
                 b->width = (w + !(orientation&1))>>1;
                 b->height= (h + !(orientation>1))>>1;
 
                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
                 b->buf_x_offset = 0;
                 b->buf_y_offset = 0;
 
                 if(orientation&1){
                     b->buf += (w+1)>>1;
                     b->buf_x_offset = (w+1)>>1;
                 }
                 if(orientation>1){
                     b->buf += b->stride>>1;
                     b->buf_y_offset = b->stride_line >> 1;
                 }
                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
 
                 if(level)
                     b->parent= &s->plane[plane_index].band[level-1][orientation];
                 //FIXME avoid this realloc
                 av_freep(&b->x_coeff);
                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
             }
             w= (w+1)>>1;
             h= (h+1)>>1;
         }
     }
 
     return 0;
5eb4af6c
 fail:
     return AVERROR(ENOMEM);
d773d855
 }
 
 #define USE_HALFPEL_PLANE 0
 
 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
     int p,x,y;
 
     for(p=0; p<3; p++){
         int is_chroma= !!p;
68c845cd
         int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
         int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
d773d855
         int ls= frame->linesize[p];
         uint8_t *src= frame->data[p];
 
418f066f
         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
d773d855
 
         halfpel[0][p]= src;
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
                 int i= y*ls + x;
 
                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
             }
         }
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
                 int i= y*ls + x;
 
                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
             }
         }
         src= halfpel[1][p];
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
                 int i= y*ls + x;
 
                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
             }
         }
 
 //FIXME border!
     }
 }
 
7f1b4270
 void ff_snow_release_buffer(AVCodecContext *avctx)
 {
d773d855
     SnowContext *s = avctx->priv_data;
     int i;
 
     if(s->last_picture[s->max_ref_frames-1].data[0]){
         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
         for(i=0; i<9; i++)
             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
     }
 }
 
7f1b4270
 int ff_snow_frame_start(SnowContext *s){
d773d855
    AVFrame tmp;
    int w= s->avctx->width; //FIXME round up to x16 ?
    int h= s->avctx->height;
 
8fa97302
     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
1500be13
         s->dsp.draw_edges(s->current_picture.data[0],
                           s->current_picture.linesize[0], w   , h   ,
c90b9442
                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
1500be13
         s->dsp.draw_edges(s->current_picture.data[1],
68c845cd
                           s->current_picture.linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
1500be13
         s->dsp.draw_edges(s->current_picture.data[2],
68c845cd
                           s->current_picture.linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
d773d855
     }
 
7f1b4270
     ff_snow_release_buffer(s->avctx);
d773d855
 
     tmp= s->last_picture[s->max_ref_frames-1];
     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
     s->last_picture[0]= s->current_picture;
     s->current_picture= tmp;
 
     if(s->keyframe){
         s->ref_frames= 0;
     }else{
         int i;
         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
             if(i && s->last_picture[i-1].key_frame)
                 break;
         s->ref_frames= i;
         if(s->ref_frames==0){
             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
             return -1;
         }
     }
 
371e1654
     s->current_picture.reference= 3;
874c5b02
     if(ff_get_buffer(s->avctx, &s->current_picture) < 0){
d773d855
         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return -1;
     }
 
     s->current_picture.key_frame= s->keyframe;
 
     return 0;
 }
 
7f1b4270
 av_cold void ff_snow_common_end(SnowContext *s)
 {
d773d855
     int plane_index, level, orientation, i;
 
     av_freep(&s->spatial_dwt_buffer);
d9669eab
     av_freep(&s->temp_dwt_buffer);
d773d855
     av_freep(&s->spatial_idwt_buffer);
d9669eab
     av_freep(&s->temp_idwt_buffer);
cbd9b2f9
     av_freep(&s->run_buffer);
d773d855
 
     s->m.me.temp= NULL;
     av_freep(&s->m.me.scratchpad);
     av_freep(&s->m.me.map);
     av_freep(&s->m.me.score_map);
     av_freep(&s->m.obmc_scratchpad);
 
     av_freep(&s->block);
     av_freep(&s->scratchbuf);
33895451
     av_freep(&s->emu_edge_buffer);
d773d855
 
     for(i=0; i<MAX_REF_FRAMES; i++){
         av_freep(&s->ref_mvs[i]);
         av_freep(&s->ref_scores[i]);
69971410
         if(s->last_picture[i].data[0]) {
             av_assert0(s->last_picture[i].data[0] != s->current_picture.data[0]);
d773d855
             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
69971410
         }
d773d855
     }
 
     for(plane_index=0; plane_index<3; plane_index++){
         for(level=s->spatial_decomposition_count-1; level>=0; level--){
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
                 SubBand *b= &s->plane[plane_index].band[level][orientation];
 
                 av_freep(&b->x_coeff);
             }
         }
     }
e8c6411c
     if (s->mconly_picture.data[0])
         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
     if (s->current_picture.data[0])
         s->avctx->release_buffer(s->avctx, &s->current_picture);
d773d855
 }