GitList

libavcodec/faandct.c

65e4c8c9	/* * Floating point AAN DCT
7b94177e	* this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c) *
f7b1d72f	* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> * Copyright (c) 2003 Roman Shaposhnik
65e4c8c9	*
f7b1d72f	* Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies.
5b0d04d3	*
f7b1d72f	* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
65e4c8c9	/ /*
ba87f080	* @file
115329f1	* @brief
65e4c8c9	* Floating point AAN DCT * @author Michael Niedermayer <michaelni@gmx.at> */ #include "dsputil.h" #include "faandct.h"
1d9c2dc8	#include "libavutil/internal.h" #include "libavutil/libm.h"
65e4c8c9	#define FLOAT float //numbers generated by simple c code (not as accurate as they could be) /* for(i=0; i<8; i++){ printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(iacosl(-1.0)/(long double)16.0)sqrtl(2))); } / #define B0 1.00000000000000000000 #define B1 0.72095982200694791383 // (cos(pi1/16)sqrt(2))^-1 #define B2 0.76536686473017954350 // (cos(pi2/16)sqrt(2))^-1 #define B3 0.85043009476725644878 // (cos(pi3/16)sqrt(2))^-1 #define B4 1.00000000000000000000 // (cos(pi4/16)sqrt(2))^-1 #define B5 1.27275858057283393842 // (cos(pi5/16)sqrt(2))^-1 #define B6 1.84775906502257351242 // (cos(pi6/16)sqrt(2))^-1 #define B7 3.62450978541155137218 // (cos(pi7/16)sqrt(2))^-1 #define A1 0.70710678118654752438 // cos(pi4/16) #define A2 0.54119610014619698435 // cos(pi6/16)sqrt(2) #define A5 0.38268343236508977170 // cos(pi6/16) #define A4 1.30656296487637652774 // cos(pi2/16)sqrt(2)
cf2baeb3	static const FLOAT postscale[64]={
65e4c8c9	B0B0, B0B1, B0B2, B0B3, B0B4, B0B5, B0B6, B0B7, B1B0, B1B1, B1B2, B1B3, B1B4, B1B5, B1B6, B1B7, B2B0, B2B1, B2B2, B2B3, B2B4, B2B5, B2B6, B2B7, B3B0, B3B1, B3B2, B3B3, B3B4, B3B5, B3B6, B3B7, B4B0, B4B1, B4B2, B4B3, B4B4, B4B5, B4B6, B4B7, B5B0, B5B1, B5B2, B5B3, B5B4, B5B5, B5B6, B5B7, B6B0, B6B1, B6B2, B6B3, B6B4, B6B5, B6B6, B6B7, B7B0, B7B1, B7B2, B7B3, B7B4, B7B5, B7B6, B7B7, };
849f1035	static av_always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
65e4c8c9	{ FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FLOAT tmp10, tmp11, tmp12, tmp13;
8d06bd53	FLOAT z2, z4, z11, z13; FLOAT av_unused z5;
65e4c8c9	int i; for (i=0; i<8*8; i+=8) { tmp0= data[0 + i] + data[7 + i]; tmp7= data[0 + i] - data[7 + i]; tmp1= data[1 + i] + data[6 + i]; tmp6= data[1 + i] - data[6 + i]; tmp2= data[2 + i] + data[5 + i]; tmp5= data[2 + i] - data[5 + i]; tmp3= data[3 + i] + data[4 + i]; tmp4= data[3 + i] - data[4 + i];
115329f1
65e4c8c9	tmp10= tmp0 + tmp3; tmp13= tmp0 - tmp3; tmp11= tmp1 + tmp2; tmp12= tmp1 - tmp2;
115329f1
65e4c8c9	temp[0 + i]= tmp10 + tmp11; temp[4 + i]= tmp10 - tmp11;
115329f1
e6d7ef64	tmp12 += tmp13; tmp12 *= A1; temp[2 + i]= tmp13 + tmp12; temp[6 + i]= tmp13 - tmp12;
115329f1
92ac614f	tmp4 += tmp5; tmp5 += tmp6; tmp6 += tmp7;
65e4c8c9
c3a3d344	#if 0
92ac614f	z5= (tmp4 - tmp6) * A5; z2= tmp4A2 + z5; z4= tmp6A4 + z5;
c3a3d344	#else z2= tmp4(A2+A5) - tmp6A5; z4= tmp6(A4-A5) + tmp4A5; #endif
8af4d0fc	tmp5*=A1;
65e4c8c9
8af4d0fc	z11= tmp7 + tmp5; z13= tmp7 - tmp5;
65e4c8c9	temp[5 + i]= z13 + z2; temp[3 + i]= z13 - z2; temp[1 + i]= z11 + z4; temp[7 + i]= z11 - z4;
115329f1	}
d43fb4e8	} void ff_faandct(DCTELEM * data) { FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FLOAT tmp10, tmp11, tmp12, tmp13;
8d06bd53	FLOAT z2, z4, z11, z13; FLOAT av_unused z5;
d43fb4e8	FLOAT temp[64]; int i; emms_c(); row_fdct(temp, data);
65e4c8c9	for (i=0; i<8; i++) { tmp0= temp[80 + i] + temp[87 + i]; tmp7= temp[80 + i] - temp[87 + i]; tmp1= temp[81 + i] + temp[86 + i]; tmp6= temp[81 + i] - temp[86 + i]; tmp2= temp[82 + i] + temp[85 + i]; tmp5= temp[82 + i] - temp[85 + i]; tmp3= temp[83 + i] + temp[84 + i]; tmp4= temp[83 + i] - temp[84 + i];
115329f1
65e4c8c9	tmp10= tmp0 + tmp3; tmp13= tmp0 - tmp3; tmp11= tmp1 + tmp2; tmp12= tmp1 - tmp2;
115329f1
856c8e0a	data[80 + i]= lrintf(postscale[80 + i] * (tmp10 + tmp11)); data[84 + i]= lrintf(postscale[84 + i] * (tmp10 - tmp11));
115329f1
e6d7ef64	tmp12 += tmp13; tmp12 *= A1;
856c8e0a	data[82 + i]= lrintf(postscale[82 + i] * (tmp13 + tmp12)); data[86 + i]= lrintf(postscale[86 + i] * (tmp13 - tmp12));
115329f1
92ac614f	tmp4 += tmp5; tmp5 += tmp6; tmp6 += tmp7;
65e4c8c9
c3a3d344	#if 0
92ac614f	z5= (tmp4 - tmp6) * A5; z2= tmp4A2 + z5; z4= tmp6A4 + z5;
c3a3d344	#else z2= tmp4(A2+A5) - tmp6A5; z4= tmp6(A4-A5) + tmp4A5; #endif
8af4d0fc	tmp5*=A1;
65e4c8c9
8af4d0fc	z11= tmp7 + tmp5; z13= tmp7 - tmp5;
65e4c8c9
856c8e0a	data[85 + i]= lrintf(postscale[85 + i] * (z13 + z2)); data[83 + i]= lrintf(postscale[83 + i] * (z13 - z2)); data[81 + i]= lrintf(postscale[81 + i] * (z11 + z4)); data[87 + i]= lrintf(postscale[87 + i] * (z11 - z4));
65e4c8c9	} }
48b1f800	void ff_faandct248(DCTELEM * data) { FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FLOAT tmp10, tmp11, tmp12, tmp13; FLOAT temp[64]; int i; emms_c();
d43fb4e8	row_fdct(temp, data);
48b1f800	for (i=0; i<8; i++) { tmp0 = temp[80 + i] + temp[81 + i]; tmp1 = temp[82 + i] + temp[83 + i]; tmp2 = temp[84 + i] + temp[85 + i]; tmp3 = temp[86 + i] + temp[87 + i]; tmp4 = temp[80 + i] - temp[81 + i]; tmp5 = temp[82 + i] - temp[83 + i]; tmp6 = temp[84 + i] - temp[85 + i]; tmp7 = temp[86 + i] - temp[87 + i];
115329f1
48b1f800	tmp10 = tmp0 + tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; tmp13 = tmp0 - tmp3;
115329f1
856c8e0a	data[80 + i] = lrintf(postscale[80 + i] * (tmp10 + tmp11)); data[84 + i] = lrintf(postscale[84 + i] * (tmp10 - tmp11));
115329f1
e6d7ef64	tmp12 += tmp13; tmp12 *= A1;
856c8e0a	data[82 + i] = lrintf(postscale[82 + i] * (tmp13 + tmp12)); data[86 + i] = lrintf(postscale[86 + i] * (tmp13 - tmp12));
115329f1
48b1f800	tmp10 = tmp4 + tmp7;
bb270c08	tmp11 = tmp5 + tmp6; tmp12 = tmp5 - tmp6; tmp13 = tmp4 - tmp7;
48b1f800
856c8e0a	data[81 + i] = lrintf(postscale[80 + i] * (tmp10 + tmp11)); data[85 + i] = lrintf(postscale[84 + i] * (tmp10 - tmp11));
48b1f800
e6d7ef64	tmp12 += tmp13; tmp12 *= A1;
856c8e0a	data[83 + i] = lrintf(postscale[82 + i] * (tmp13 + tmp12)); data[87 + i] = lrintf(postscale[86 + i] * (tmp13 - tmp12));
48b1f800	} }