libpostproc/postprocess.c
3057fa66
 /*
b78e7197
  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
  *
  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
  *
ef85972b
  * This file is part of FFmpeg.
b78e7197
  *
  * FFmpeg is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
3057fa66
 
b304569a
 /**
ba87f080
  * @file
b304569a
  * postprocessing.
  */
115329f1
 
3057fa66
 /*
bb270c08
                         C       MMX     MMX2    3DNow   AltiVec
 isVertDC                Ec      Ec                      Ec
 isVertMinMaxOk          Ec      Ec                      Ec
 doVertLowPass           E               e       e       Ec
 doVertDefFilter         Ec      Ec      e       e       Ec
 isHorizDC               Ec      Ec                      Ec
 isHorizMinMaxOk         a       E                       Ec
 doHorizLowPass          E               e       e       Ec
 doHorizDefFilter        Ec      Ec      e       e       Ec
 do_a_deblock            Ec      E       Ec      E
 deRing                  E               e       e*      Ecp
 Vertical RKAlgo1        E               a       a
 Horizontal RKAlgo1                      a       a
 Vertical X1#            a               E       E
 Horizontal X1#          a               E       E
 LinIpolDeinterlace      e               E       E*
 CubicIpolDeinterlace    a               e       e*
 LinBlendDeinterlace     e               E       E*
 MedianDeinterlace#      E       Ec      Ec
 TempDeNoiser#           E               e       e       Ec
d5a1a995
 
2cab6401
 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
755bfeab
 # more or less selfinvented filters so the exactness is not too meaningful
3057fa66
 E = Exact implementation
04932b0d
 e = almost exact implementation (slightly different rounding,...)
3057fa66
 a = alternative / approximate impl
 c = checked against the other implementations (-vo md5)
b0ac780a
 p = partially optimized, still some work to do
3057fa66
 */
 
 /*
 TODO:
 reduce the time wasted on the mem transfer
 unroll stuff if instructions depend too much on the prior one
 move YScale thing to the end instead of fixing QP
13e00528
 write a faster and higher quality deblocking filter :)
d5a1a995
 make the mainloop more flexible (variable number of blocks at once
bb270c08
         (the if/else stuff per block is slowing things down)
9f45d04d
 compare the quality & speed of all filters
 split this huge file
8405b3fd
 optimize c versions
117e45b0
 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
3057fa66
 ...
13e00528
 */
 
faa6f1c3
 //Changelog: use git log
3057fa66
 
9858f773
 #include "config.h"
245976da
 #include "libavutil/avutil.h"
55a6d8d9
 #include "libavutil/avassert.h"
59074310
 #include "libavutil/intreadwrite.h"
3057fa66
 #include <inttypes.h>
 #include <stdio.h>
d5a1a995
 #include <stdlib.h>
911879d1
 #include <string.h>
e20ac54f
 //#undef HAVE_MMXEXT_INLINE
 //#define HAVE_AMD3DNOW_INLINE
 //#undef HAVE_MMX_INLINE
cc9b0679
 //#undef ARCH_X86
7f16f6e6
 //#define DEBUG_BRIGHTNESS
13e00528
 #include "postprocess.h"
c41d972d
 #include "postprocess_internal.h"
84fb4e9d
 #include "libavutil/avstring.h"
172b0e2e
 #include "libavutil/ppc/util_altivec.h"
bba9b16c
 
649c158e
 #include "libavutil/ffversion.h"
 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
 
2a4a62bf
 unsigned postproc_version(void)
 {
55a6d8d9
     av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
2a4a62bf
     return LIBPOSTPROC_VERSION_INT;
 }
 
41600690
 const char *postproc_configuration(void)
c1736936
 {
e528cdac
     return FFMPEG_CONFIGURATION;
c1736936
 }
 
41600690
 const char *postproc_license(void)
c1736936
 {
 #define LICENSE_PREFIX "libpostproc license: "
0cb88628
     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
c1736936
 }
 
911879d1
 #define GET_MODE_BUFFER_SIZE 500
 #define OPTIONS_ARRAY_SIZE 10
9c9e467d
 #define BLOCK_SIZE 8
 #define TEMP_STRIDE 8
 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
911879d1
 
e20ac54f
 #if ARCH_X86 && HAVE_INLINE_ASM
2b858d0b
 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
b28daef8
 #endif
3057fa66
 
2722e362
 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
3057fa66
 
9c9e467d
 
e034b07e
 static const struct PPFilter filters[]=
911879d1
 {
16e0bf73
     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
     {"dr", "dering",                1, 5, 6, DERING},
     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
7f80afe0
     {"be", "bitexact",              1, 0, 0, BITEXACT},
ac682955
     {"vi", "visualize",             1, 0, 0, VISUALIZE},
16e0bf73
     {NULL, NULL,0,0,0,0} //End Marker
911879d1
 };
 
d9e2aceb
 static const char * const replaceTable[]=
911879d1
 {
16e0bf73
     "default",      "hb:a,vb:a,dr:a",
     "de",           "hb:a,vb:a,dr:a",
     "fast",         "h1:a,v1:a,dr:a",
     "fa",           "h1:a,v1:a,dr:a",
     "ac",           "ha:a:128:7,va:a,dr:a",
     NULL //End Marker
911879d1
 };
 
04932b0d
 /* The horizontal functions exist only in C because the MMX
  * code is faster with vertical filters and transposing. */
3057fa66
 
cf5ec61d
 /**
  * Check if the given 8x8 Block is mostly "flat"
  */
e034b07e
 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
cf5ec61d
 {
16e0bf73
     int numEq= 0;
     int y;
     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
     const int dcThreshold= dcOffset*2 + 1;
 
     for(y=0; y<BLOCK_SIZE; y++){
9f9ebe63
         numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
16e0bf73
         src+= stride;
     }
     return numEq > c->ppMode.flatnessThreshold;
9c9e467d
 }
 
 /**
  * Check if the middle 8x8 Block in the given 8x16 block is flat
  */
e034b07e
 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
16e0bf73
 {
     int numEq= 0;
     int y;
     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
     const int dcThreshold= dcOffset*2 + 1;
 
     src+= stride*4; // src points to begin of the 8x8 Block
     for(y=0; y<BLOCK_SIZE-1; y++){
9f9ebe63
         numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
16e0bf73
         src+= stride;
     }
     return numEq > c->ppMode.flatnessThreshold;
cf5ec61d
 }
 
e034b07e
 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
cf5ec61d
 {
16e0bf73
     int i;
     for(i=0; i<2; i++){
         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
         src += stride;
         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
         src += stride;
         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
         src += stride;
         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
         src += stride;
     }
     return 1;
cb482d25
 }
cf5ec61d
 
e034b07e
 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
cb482d25
 {
16e0bf73
     int x;
     src+= stride*4;
     for(x=0; x<BLOCK_SIZE; x+=4){
         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
     }
     return 1;
cb482d25
 }
 
e034b07e
 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
16e0bf73
 {
     if( isHorizDC_C(src, stride, c) ){
44dabf1f
         return isHorizMinMaxOk_C(src, stride, c->QP);
16e0bf73
     }else{
         return 2;
     }
b0ac780a
 }
 
e034b07e
 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
16e0bf73
 {
     if( isVertDC_C(src, stride, c) ){
44dabf1f
         return isVertMinMaxOk_C(src, stride, c->QP);
16e0bf73
     }else{
         return 2;
     }
cf5ec61d
 }
 
e034b07e
 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
cf5ec61d
 {
16e0bf73
     int y;
     for(y=0; y<BLOCK_SIZE; y++){
         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 
         if(FFABS(middleEnergy) < 8*c->QP){
             const int q=(dst[3] - dst[4])/2;
             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 
             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
             d= FFMAX(d, 0);
 
             d= (5*d + 32) >> 6;
             d*= FFSIGN(-middleEnergy);
 
             if(q>0)
             {
bb8b7525
                 d = FFMAX(d, 0);
                 d = FFMIN(d, q);
16e0bf73
             }
             else
             {
bb8b7525
                 d = FFMIN(d, 0);
                 d = FFMAX(d, q);
16e0bf73
             }
 
             dst[3]-= d;
             dst[4]+= d;
bb270c08
         }
16e0bf73
         dst+= stride;
     }
cf5ec61d
 }
 
 /**
  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
  */
e034b07e
 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
cf5ec61d
 {
16e0bf73
     int y;
     for(y=0; y<BLOCK_SIZE; y++){
         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 
         int sums[10];
         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
         sums[1] = sums[0] - first  + dst[3];
         sums[2] = sums[1] - first  + dst[4];
         sums[3] = sums[2] - first  + dst[5];
         sums[4] = sums[3] - first  + dst[6];
         sums[5] = sums[4] - dst[0] + dst[7];
         sums[6] = sums[5] - dst[1] + last;
         sums[7] = sums[6] - dst[2] + last;
         sums[8] = sums[7] - dst[3] + last;
         sums[9] = sums[8] - dst[4] + last;
 
         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 
         dst+= stride;
     }
cf5ec61d
 }
 
cc9b0679
 /**
  * Experimental Filter 1 (Horizontal)
  * will not damage linear gradients
bd107136
  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
755bfeab
  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
  * MMX2 version does correct clipping C version does not
cc9b0679
  * not identical with the vertical one
  */
 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
3057fa66
 {
16e0bf73
     int y;
f9d8658d
     static uint64_t lut[256];
     if(!lut[255])
16e0bf73
     {
         int i;
         for(i=0; i<256; i++)
bb270c08
         {
16e0bf73
             int v= i < 128 ? 2*i : 2*(i-256);
cc9b0679
 /*
 //Simulate 112242211 9-Tap filter
16e0bf73
             uint64_t a= (v/16)  & 0xFF;
             uint64_t b= (v/8)   & 0xFF;
             uint64_t c= (v/4)   & 0xFF;
             uint64_t d= (3*v/8) & 0xFF;
cc9b0679
 */
 //Simulate piecewise linear interpolation
16e0bf73
             uint64_t a= (v/16)   & 0xFF;
             uint64_t b= (v*3/16) & 0xFF;
             uint64_t c= (v*5/16) & 0xFF;
             uint64_t d= (7*v/16) & 0xFF;
             uint64_t A= (0x100 - a)&0xFF;
             uint64_t B= (0x100 - b)&0xFF;
             uint64_t C= (0x100 - c)&0xFF;
             uint64_t D= (0x100 - c)&0xFF;
 
             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
                        (D<<24) | (C<<16) | (B<<8)  | (A);
             //lut[i] = (v<<32) | (v<<24);
bb270c08
         }
16e0bf73
     }
bb270c08
 
16e0bf73
     for(y=0; y<BLOCK_SIZE; y++){
         int a= src[1] - src[2];
         int b= src[3] - src[4];
         int c= src[5] - src[6];
bb270c08
 
16e0bf73
         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
bb270c08
 
16e0bf73
         if(d < QP){
             int v = d * FFSIGN(-b);
bb270c08
 
16e0bf73
             src[1] +=v/8;
             src[2] +=v/4;
             src[3] +=3*v/8;
             src[4] -=3*v/8;
             src[5] -=v/4;
             src[6] -=v/8;
bb270c08
         }
16e0bf73
         src+=stride;
     }
3b58b885
 }
 
12eebd26
 /**
  * accurate deblock filter
  */
e034b07e
 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
ac682955
                                             int stride, const PPContext *c, int mode)
e034b07e
 {
16e0bf73
     int y;
     const int QP= c->QP;
     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
     const int dcThreshold= dcOffset*2 + 1;
12eebd26
 //START_TIMER
16e0bf73
     src+= step*4; // src points to begin of the 8x8 Block
     for(y=0; y<8; y++){
         int numEq= 0;
 
5ccd08d2
         numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
         numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
16e0bf73
         if(numEq > c->ppMode.flatnessThreshold){
             int min, max, x;
 
             if(src[0] > src[step]){
                 max= src[0];
                 min= src[step];
             }else{
                 max= src[step];
                 min= src[0];
             }
             for(x=2; x<8; x+=2){
                 if(src[x*step] > src[(x+1)*step]){
                         if(src[x    *step] > max) max= src[ x   *step];
                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
bb270c08
                 }else{
16e0bf73
                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
                         if(src[ x   *step] < min) min= src[ x   *step];
                 }
             }
             if(max-min < 2*QP){
                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 
                 int sums[10];
                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
                 sums[1] = sums[0] - first       + src[3*step];
                 sums[2] = sums[1] - first       + src[4*step];
                 sums[3] = sums[2] - first       + src[5*step];
                 sums[4] = sums[3] - first       + src[6*step];
                 sums[5] = sums[4] - src[0*step] + src[7*step];
                 sums[6] = sums[5] - src[1*step] + last;
                 sums[7] = sums[6] - src[2*step] + last;
                 sums[8] = sums[7] - src[3*step] + last;
                 sums[9] = sums[8] - src[4*step] + last;
 
ac682955
                 if (mode & VISUALIZE) {
                     src[0*step] =
                     src[1*step] =
                     src[2*step] =
                     src[3*step] =
                     src[4*step] =
                     src[5*step] =
                     src[6*step] =
                     src[7*step] = 128;
                 }
16e0bf73
                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
             }
         }else{
             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 
             if(FFABS(middleEnergy) < 8*QP){
                 const int q=(src[3*step] - src[4*step])/2;
                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 
                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
                 d= FFMAX(d, 0);
 
                 d= (5*d + 32) >> 6;
                 d*= FFSIGN(-middleEnergy);
 
                 if(q>0){
bb8b7525
                     d = FFMAX(d, 0);
                     d = FFMIN(d, q);
16e0bf73
                 }else{
bb8b7525
                     d = FFMIN(d, 0);
                     d = FFMAX(d, q);
bb270c08
                 }
 
ac682955
                 if ((mode & VISUALIZE) && d) {
                     d= (d < 0) ? 32 : -32;
                     src[3*step]= av_clip_uint8(src[3*step] - d);
                     src[4*step]= av_clip_uint8(src[4*step] + d);
                     d = 0;
                 }
 
16e0bf73
                 src[3*step]-= d;
                 src[4*step]+= d;
             }
bb270c08
         }
16e0bf73
 
         src += stride;
     }
12eebd26
 /*if(step==16){
     STOP_TIMER("step16")
 }else{
     STOP_TIMER("stepX")
 }*/
 }
a6be8111
 
e89952aa
 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
cc9b0679
 //Plain C versions
7f80afe0
 //we always compile C for testing which needs bitexactness
375cd3f2
 #define TEMPLATE_PP_C 1
cc9b0679
 #include "postprocess_template.c"
4e4dcbc5
 
375cd3f2
 #if HAVE_ALTIVEC
 #   define TEMPLATE_PP_ALTIVEC 1
 #   include "postprocess_altivec_template.c"
 #   include "postprocess_template.c"
e89952aa
 #endif
be44a4d7
 
375cd3f2
 #if ARCH_X86 && HAVE_INLINE_ASM
 #    if CONFIG_RUNTIME_CPUDETECT
 #        define TEMPLATE_PP_MMX 1
 #        include "postprocess_template.c"
 #        define TEMPLATE_PP_MMXEXT 1
 #        include "postprocess_template.c"
 #        define TEMPLATE_PP_3DNOW 1
 #        include "postprocess_template.c"
4e264d1c
 #        define TEMPLATE_PP_SSE2 1
 #        include "postprocess_template.c"
375cd3f2
 #    else
4e264d1c
 #        if HAVE_SSE2_INLINE
 #            define TEMPLATE_PP_SSE2 1
 #            include "postprocess_template.c"
 #        elif HAVE_MMXEXT_INLINE
375cd3f2
 #            define TEMPLATE_PP_MMXEXT 1
 #            include "postprocess_template.c"
 #        elif HAVE_AMD3DNOW_INLINE
 #            define TEMPLATE_PP_3DNOW 1
 #            include "postprocess_template.c"
 #        elif HAVE_MMX_INLINE
 #            define TEMPLATE_PP_MMX 1
 #            include "postprocess_template.c"
 #        endif
 #    endif
e89952aa
 #endif
be44a4d7
 
c6945228
 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
2b7da70a
                       const int8_t QPs[], int QPStride, int isColor, PPContext *c2);
c6945228
 
6c51fd3f
 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
2b7da70a
         const int8_t QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
cc9b0679
 {
c6945228
     pp_fn pp = postProcess_C;
16e0bf73
     PPContext *c= (PPContext *)vc;
     PPMode *ppMode= (PPMode *)vm;
     c->ppMode= *ppMode; //FIXME
9c9e467d
 
c6945228
     if (!(ppMode->lumMode & BITEXACT)) {
e90f5b5a
 #if CONFIG_RUNTIME_CPUDETECT
e20ac54f
 #if ARCH_X86 && HAVE_INLINE_ASM
c6945228
         // ordered per speed fastest first
4e264d1c
         if      (c->cpuCaps & AV_CPU_FLAG_SSE2)     pp = postProcess_SSE2;
         else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT)   pp = postProcess_MMX2;
59d686f1
         else if (c->cpuCaps & AV_CPU_FLAG_3DNOW)    pp = postProcess_3DNow;
         else if (c->cpuCaps & AV_CPU_FLAG_MMX)      pp = postProcess_MMX;
c6945228
 #elif HAVE_ALTIVEC
59d686f1
         if      (c->cpuCaps & AV_CPU_FLAG_ALTIVEC)  pp = postProcess_altivec;
be44a4d7
 #endif
c68fafe0
 #else /* CONFIG_RUNTIME_CPUDETECT */
4e264d1c
 #if     HAVE_SSE2_INLINE
         pp = postProcess_SSE2;
 #elif   HAVE_MMXEXT_INLINE
c6945228
         pp = postProcess_MMX2;
e20ac54f
 #elif HAVE_AMD3DNOW_INLINE
c6945228
         pp = postProcess_3DNow;
e20ac54f
 #elif HAVE_MMX_INLINE
c6945228
         pp = postProcess_MMX;
b250f9c6
 #elif HAVE_ALTIVEC
c6945228
         pp = postProcess_altivec;
e89952aa
 #endif
c68fafe0
 #endif /* !CONFIG_RUNTIME_CPUDETECT */
c6945228
     }
117e45b0
 
c6945228
     pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 }
13e00528
 
911879d1
 /* -pp Command line Help
 */
69fdc40d
 const char pp_help[] =
bf69c4e5
 "Available postprocessing filters:\n"
bb270c08
 "Filters                        Options\n"
 "short  long name       short   long option     Description\n"
 "*      *               a       autoq           CPU power dependent enabler\n"
 "                       c       chrom           chrominance filtering enabled\n"
 "                       y       nochrom         chrominance filtering disabled\n"
 "                       n       noluma          luma filtering disabled\n"
 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 "       1. difference factor: default=32, higher -> more deblocking\n"
 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 "                       the h & v deblocking filters share these\n"
 "                       so you can't set different thresholds for h / v\n"
 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 "dr     dering                                  deringing filter\n"
 "al     autolevels                              automatic brightness / contrast\n"
 "                       f        fullyrange     stretch luminance to (0..255)\n"
 "lb     linblenddeint                           linear blend deinterlacer\n"
 "li     linipoldeint                            linear interpolating deinterlace\n"
 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 "md     mediandeint                             median deinterlacer\n"
 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 "de     default                                 hb:a,vb:a,dr:a\n"
 "fa     fast                                    h1:a,v1:a,dr:a\n"
 "ac                                             ha:a:128:7,va:a,dr:a\n"
 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 "fq     forceQuant      <quantizer>             force quantizer\n"
bf69c4e5
 "Usage:\n"
 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 "long form example:\n"
bb270c08
 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
bf69c4e5
 "short form example:\n"
bb270c08
 "vb:a/hb:a/lb                                   de,-vb\n"
bf69c4e5
 "more examples:\n"
 "tn:64:128:256\n"
14b005d0
 "\n"
4b001a13
 ;
911879d1
 
7dfea342
 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
911879d1
 {
16e0bf73
     char temp[GET_MODE_BUFFER_SIZE];
     char *p= temp;
     static const char filterDelimiters[] = ",/";
8da1fff8
     static const char optionDelimiters[] = ":|";
16e0bf73
     struct PPMode *ppMode;
     char *filterToken;
 
561a365d
     if (!name)  {
         av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
         return NULL;
     }
 
f1808e30
     if (!strcmp(name, "help")) {
         const char *p;
         for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
             av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
             av_log(NULL, AV_LOG_INFO, "%s", temp);
         }
         return NULL;
     }
 
16e0bf73
     ppMode= av_malloc(sizeof(PPMode));
f9d24ee1
     if (!ppMode)
         return NULL;
16e0bf73
 
     ppMode->lumMode= 0;
     ppMode->chromMode= 0;
     ppMode->maxTmpNoise[0]= 700;
     ppMode->maxTmpNoise[1]= 1500;
     ppMode->maxTmpNoise[2]= 3000;
     ppMode->maxAllowedY= 234;
     ppMode->minAllowedY= 16;
     ppMode->baseDcDiff= 256/8;
     ppMode->flatnessThreshold= 56-16-1;
6f51674c
     ppMode->maxClippedThreshold= (AVRational){1,100};
16e0bf73
     ppMode->error=0;
 
84fb4e9d
     memset(temp, 0, GET_MODE_BUFFER_SIZE);
     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
16e0bf73
 
     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 
     for(;;){
921caf62
         const char *filterName;
16e0bf73
         int q= 1000000; //PP_QUALITY_MAX;
         int chrom=-1;
         int luma=-1;
921caf62
         const char *option;
         const char *options[OPTIONS_ARRAY_SIZE];
16e0bf73
         int i;
         int filterNameOk=0;
         int numOfUnknownOptions=0;
         int enable=1; //does the user want us to enabled or disabled the filter
9e8be462
         char *tokstate;
16e0bf73
 
9e8be462
         filterToken= av_strtok(p, filterDelimiters, &tokstate);
fb33bff9
         if(!filterToken) break;
16e0bf73
         p+= strlen(filterToken) + 1; // p points to next filterToken
9e8be462
         filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
fb33bff9
         if (!filterName) {
f6173fed
             ppMode->error++;
             break;
         }
16e0bf73
         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 
         if(*filterName == '-'){
             enable=0;
             filterName++;
         }
bb270c08
 
16e0bf73
         for(;;){ //for all options
9e8be462
             option= av_strtok(NULL, optionDelimiters, &tokstate);
fb33bff9
             if(!option) break;
16e0bf73
 
             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
             else{
                 options[numOfUnknownOptions] = option;
                 numOfUnknownOptions++;
             }
             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
         }
         options[numOfUnknownOptions] = NULL;
 
         /* replace stuff from the replace Table */
81a663f4
         for(i=0; replaceTable[2*i]; i++){
16e0bf73
             if(!strcmp(replaceTable[2*i], filterName)){
07cb6bf9
                 size_t newlen = strlen(replaceTable[2*i + 1]);
16e0bf73
                 int plen;
                 int spaceLeft;
 
c96f3750
                 p--, *p=',';
16e0bf73
 
                 plen= strlen(p);
                 spaceLeft= p - temp + plen;
84fb4e9d
                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
16e0bf73
                     ppMode->error++;
                     break;
bb270c08
                 }
16e0bf73
                 memmove(p + newlen, p, plen+1);
                 memcpy(p, replaceTable[2*i + 1], newlen);
                 filterNameOk=1;
             }
         }
bb270c08
 
81a663f4
         for(i=0; filters[i].shortName; i++){
16e0bf73
             if(   !strcmp(filters[i].longName, filterName)
                || !strcmp(filters[i].shortName, filterName)){
                 ppMode->lumMode &= ~filters[i].mask;
                 ppMode->chromMode &= ~filters[i].mask;
 
                 filterNameOk=1;
                 if(!enable) break; // user wants to disable it
 
                 if(q >= filters[i].minLumQuality && luma)
                     ppMode->lumMode|= filters[i].mask;
                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
                     if(q >= filters[i].minChromQuality)
                             ppMode->chromMode|= filters[i].mask;
 
                 if(filters[i].mask == LEVEL_FIX){
                     int o;
                     ppMode->minAllowedY= 16;
                     ppMode->maxAllowedY= 234;
81a663f4
                     for(o=0; options[o]; o++){
16e0bf73
                         if(  !strcmp(options[o],"fullyrange")
                            ||!strcmp(options[o],"f")){
                             ppMode->minAllowedY= 0;
                             ppMode->maxAllowedY= 255;
                             numOfUnknownOptions--;
bb270c08
                         }
16e0bf73
                     }
bb270c08
                 }
16e0bf73
                 else if(filters[i].mask == TEMP_NOISE_FILTER)
bb270c08
                 {
16e0bf73
                     int o;
                     int numOfNoises=0;
 
81a663f4
                     for(o=0; options[o]; o++){
16e0bf73
                         char *tail;
                         ppMode->maxTmpNoise[numOfNoises]=
                             strtol(options[o], &tail, 0);
                         if(tail!=options[o]){
                             numOfNoises++;
                             numOfUnknownOptions--;
                             if(numOfNoises >= 3) break;
bb270c08
                         }
16e0bf73
                     }
bb270c08
                 }
16e0bf73
                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
                     int o;
 
81a663f4
                     for(o=0; options[o] && o<2; o++){
16e0bf73
                         char *tail;
                         int val= strtol(options[o], &tail, 0);
                         if(tail==options[o]) break;
 
                         numOfUnknownOptions--;
                         if(o==0) ppMode->baseDcDiff= val;
                         else ppMode->flatnessThreshold= val;
                     }
                 }
                 else if(filters[i].mask == FORCE_QUANT){
                     int o;
                     ppMode->forcedQuant= 15;
 
81a663f4
                     for(o=0; options[o] && o<1; o++){
16e0bf73
                         char *tail;
                         int val= strtol(options[o], &tail, 0);
                         if(tail==options[o]) break;
 
                         numOfUnknownOptions--;
                         ppMode->forcedQuant= val;
                     }
                 }
             }
bb270c08
         }
16e0bf73
         if(!filterNameOk) ppMode->error++;
         ppMode->error += numOfUnknownOptions;
     }
 
     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
     if(ppMode->error){
         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
         av_free(ppMode);
         return NULL;
     }
     return ppMode;
911879d1
 }
 
7dfea342
 void pp_free_mode(pp_mode *mode){
6ab6c7c3
     av_free(mode);
c41d972d
 }
 
d9b141c0
 static void reallocAlign(void **p, int size){
16e0bf73
     av_free(*p);
     *p= av_mallocz(size);
88c0bc7e
 }
 
0426af31
 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
16e0bf73
     int mbWidth = (width+15)>>4;
     int mbHeight= (height+15)>>4;
     int i;
 
     c->stride= stride;
     c->qpStride= qpStride;
 
d9b141c0
     reallocAlign((void **)&c->tempDst, stride*24+32);
     reallocAlign((void **)&c->tempSrc, stride*24);
     reallocAlign((void **)&c->tempBlocks, 2*16*8);
     reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
16e0bf73
     for(i=0; i<256; i++)
             c->yHistogram[i]= width*height/64*15/256;
 
     for(i=0; i<3; i++){
da9cea77
         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
d9b141c0
         reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
         reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
16e0bf73
     }
 
d9b141c0
     reallocAlign((void **)&c->deintTemp, 2*width+32);
2b7da70a
     reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(int8_t));
     reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(int8_t));
     reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(int8_t));
88c0bc7e
 }
 
e7becfb2
 static const char * context_to_name(void * ptr) {
     return "postproc";
 }
 
31bfd6f3
 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
e7becfb2
 
78cd9c56
 av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){
45e0aa46
     PPContext *c= av_mallocz(sizeof(PPContext));
ef516f73
     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
16e0bf73
     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 
ac424b23
     if (!c)
         return NULL;
 
16e0bf73
     c->av_class = &av_codec_context_class;
     if(cpuCaps&PP_FORMAT){
         c->hChromaSubSample= cpuCaps&0x3;
         c->vChromaSubSample= (cpuCaps>>4)&0x3;
     }else{
         c->hChromaSubSample= 1;
         c->vChromaSubSample= 1;
     }
59d686f1
     if (cpuCaps & PP_CPU_CAPS_AUTO) {
         c->cpuCaps = av_get_cpu_flags();
     } else {
         c->cpuCaps = 0;
         if (cpuCaps & PP_CPU_CAPS_MMX)      c->cpuCaps |= AV_CPU_FLAG_MMX;
         if (cpuCaps & PP_CPU_CAPS_MMX2)     c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
         if (cpuCaps & PP_CPU_CAPS_3DNOW)    c->cpuCaps |= AV_CPU_FLAG_3DNOW;
         if (cpuCaps & PP_CPU_CAPS_ALTIVEC)  c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
     }
16e0bf73
 
     reallocBuffers(c, width, height, stride, qpStride);
 
     c->frameNum=-1;
 
     return c;
45b4f285
 }
 
78cd9c56
 av_cold void pp_free_context(void *vc){
16e0bf73
     PPContext *c = (PPContext*)vc;
     int i;
115329f1
 
859d7d46
     for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
         av_free(c->tempBlurred[i]);
     for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
         av_free(c->tempBlurredPast[i]);
115329f1
 
16e0bf73
     av_free(c->tempBlocks);
     av_free(c->yHistogram);
     av_free(c->tempDst);
     av_free(c->tempSrc);
     av_free(c->deintTemp);
     av_free(c->stdQPTable);
     av_free(c->nonBQPTable);
     av_free(c->forcedQPTable);
115329f1
 
16e0bf73
     memset(c, 0, sizeof(PPContext));
88c0bc7e
 
16e0bf73
     av_free(c);
9c9e467d
 }
 
6c51fd3f
 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
16e0bf73
                      uint8_t * dst[3], const int dstStride[3],
                      int width, int height,
2b7da70a
                      const int8_t *QP_store,  int QPStride,
7dfea342
                      pp_mode *vm,  void *vc, int pict_type)
911879d1
 {
16e0bf73
     int mbWidth = (width+15)>>4;
     int mbHeight= (height+15)>>4;
5a9ca68e
     PPMode *mode = vm;
     PPContext *c = vc;
16e0bf73
     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
     int absQPStride = FFABS(QPStride);
 
     // c->stride and c->QPStride are always positive
     if(c->stride < minStride || c->qpStride < absQPStride)
         reallocBuffers(c, width, height,
                        FFMAX(minStride, c->stride),
                        FFMAX(c->qpStride, absQPStride));
 
fb33bff9
     if(!QP_store || (mode->lumMode & FORCE_QUANT)){
16e0bf73
         int i;
         QP_store= c->forcedQPTable;
         absQPStride = QPStride = 0;
         if(mode->lumMode & FORCE_QUANT)
             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
         else
             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
     }
0426af31
 
16e0bf73
     if(pict_type & PP_PICT_TYPE_QP2){
         int i;
0b7e5d0d
         const int count= FFMAX(mbHeight * absQPStride, mbWidth);
16e0bf73
         for(i=0; i<(count>>2); i++){
2aa21eec
             AV_WN32(c->stdQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) >> 1 & 0x7F7F7F7F);
bb270c08
         }
16e0bf73
         for(i<<=2; i<count; i++){
             c->stdQPTable[i] = QP_store[i]>>1;
         }
         QP_store= c->stdQPTable;
         QPStride= absQPStride;
     }
 
     if(0){
         int x,y;
         for(y=0; y<mbHeight; y++){
             for(x=0; x<mbWidth; x++){
e7becfb2
                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
16e0bf73
             }
             av_log(c, AV_LOG_INFO, "\n");
bb270c08
         }
e7becfb2
         av_log(c, AV_LOG_INFO, "\n");
16e0bf73
     }
 
     if((pict_type&7)!=3){
         if (QPStride >= 0){
             int i;
0b7e5d0d
             const int count= FFMAX(mbHeight * QPStride, mbWidth);
16e0bf73
             for(i=0; i<(count>>2); i++){
59074310
                 AV_WN32(c->nonBQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) & 0x3F3F3F3F);
16e0bf73
             }
             for(i<<=2; i<count; i++){
                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
             }
         } else {
             int i,j;
             for(i=0; i<mbHeight; i++) {
                 for(j=0; j<absQPStride; j++) {
                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
bb270c08
                 }
16e0bf73
             }
bb270c08
         }
16e0bf73
     }
bb270c08
 
16e0bf73
     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
            mode->lumMode, mode->chromMode);
bb270c08
 
16e0bf73
     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
bb270c08
                 width, height, QP_store, QPStride, 0, mode, c);
 
7a113333
     if (!(src[1] && src[2] && dst[1] && dst[2]))
         return;
 
16e0bf73
     width  = (width )>>c->hChromaSubSample;
     height = (height)>>c->vChromaSubSample;
 
     if(mode->chromMode){
         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
                     width, height, QP_store, QPStride, 1, mode, c);
         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
                     width, height, QP_store, QPStride, 2, mode, c);
     }
     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
         linecpy(dst[1], src[1], height, srcStride[1]);
         linecpy(dst[2], src[2], height, srcStride[2]);
     }else{
         int y;
         for(y=0; y<height; y++){
             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
bb270c08
         }
16e0bf73
     }
911879d1
 }