* commit 'a9aee08d900f686e966c64afec5d88a7d9d130a3':
dsputil: Split off FDCT bits into their own context
Conflicts:
configure
libavcodec/Makefile
libavcodec/asvenc.c
libavcodec/dnxhdenc.c
libavcodec/dsputil.c
libavcodec/mpegvideo.h
libavcodec/mpegvideo_enc.c
libavcodec/x86/Makefile
libavcodec/x86/dsputilenc_mmx.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
... | ... |
@@ -1802,6 +1802,7 @@ CONFIG_EXTRA=" |
1802 | 1802 |
cabac |
1803 | 1803 |
dsputil |
1804 | 1804 |
exif |
1805 |
+ fdctdsp |
|
1805 | 1806 |
frame_thread_encoder |
1806 | 1807 |
gcrypt |
1807 | 1808 |
golomb |
... | ... |
@@ -1995,7 +1996,7 @@ threads_if_any="$THREADS_LIST" |
1995 | 1995 |
|
1996 | 1996 |
# subsystems |
1997 | 1997 |
dct_select="rdft" |
1998 |
-dsputil_select="idctdsp" |
|
1998 |
+dsputil_select="fdctdsp idctdsp" |
|
1999 | 1999 |
error_resilience_select="dsputil" |
2000 | 2000 |
frame_thread_encoder_deps="encoders threads" |
2001 | 2001 |
intrax8_select="error_resilience" |
... | ... |
@@ -2024,9 +2025,9 @@ amv_decoder_select="sp5x_decoder exif" |
2024 | 2024 |
amv_encoder_select="aandcttables mpegvideoenc" |
2025 | 2025 |
ape_decoder_select="bswapdsp llauddsp" |
2026 | 2026 |
asv1_decoder_select="blockdsp bswapdsp idctdsp" |
2027 |
-asv1_encoder_select="bswapdsp dsputil" |
|
2027 |
+asv1_encoder_select="bswapdsp dsputil fdctdsp" |
|
2028 | 2028 |
asv2_decoder_select="blockdsp bswapdsp idctdsp" |
2029 |
-asv2_encoder_select="bswapdsp dsputil" |
|
2029 |
+asv2_encoder_select="bswapdsp dsputil fdctdsp" |
|
2030 | 2030 |
atrac1_decoder_select="mdct sinewin" |
2031 | 2031 |
atrac3_decoder_select="mdct" |
2032 | 2032 |
atrac3p_decoder_select="mdct sinewin" |
... | ... |
@@ -2043,9 +2044,9 @@ cscd_decoder_suggest="zlib" |
2043 | 2043 |
dca_decoder_select="mdct" |
2044 | 2044 |
dirac_decoder_select="dsputil dwt golomb videodsp" |
2045 | 2045 |
dnxhd_decoder_select="blockdsp idctdsp" |
2046 |
-dnxhd_encoder_select="aandcttables blockdsp dsputil idctdsp mpegvideoenc" |
|
2046 |
+dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc" |
|
2047 | 2047 |
dvvideo_decoder_select="idctdsp" |
2048 |
-dvvideo_encoder_select="dsputil" |
|
2048 |
+dvvideo_encoder_select="dsputil fdctdsp" |
|
2049 | 2049 |
dxa_decoder_select="zlib" |
2050 | 2050 |
eac3_decoder_select="ac3_decoder" |
2051 | 2051 |
eac3_encoder_select="ac3_encoder" |
... | ... |
@@ -2134,7 +2135,7 @@ opus_decoder_deps="swresample" |
2134 | 2134 |
png_decoder_select="zlib" |
2135 | 2135 |
png_encoder_select="huffyuvencdsp zlib" |
2136 | 2136 |
prores_decoder_select="blockdsp idctdsp" |
2137 |
-prores_encoder_select="dsputil" |
|
2137 |
+prores_encoder_select="fdctdsp" |
|
2138 | 2138 |
qcelp_decoder_select="lsp" |
2139 | 2139 |
qdm2_decoder_select="mdct rdft mpegaudiodsp" |
2140 | 2140 |
ra_144_encoder_select="audio_frame_queue lpc audiodsp" |
... | ... |
@@ -41,9 +41,10 @@ OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o |
41 | 41 |
OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o |
42 | 42 |
OBJS-$(CONFIG_DSPUTIL) += dsputil.o |
43 | 43 |
OBJS-$(CONFIG_DXVA2) += dxva2.o |
44 |
-OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o |
|
45 | 44 |
OBJS-$(CONFIG_ERROR_RESILIENCE) += error_resilience.o |
46 | 45 |
OBJS-$(CONFIG_EXIF) += exif.o tiff_common.o |
46 |
+OBJS-$(CONFIG_FDCTDSP) += fdctdsp.o faandct.o \ |
|
47 |
+ jfdctfst.o jfdctint.o |
|
47 | 48 |
FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o |
48 | 49 |
OBJS-$(CONFIG_FFT) += avfft.o fft_fixed.o fft_float.o \ |
49 | 50 |
fft_fixed_32.o fft_init_table.o \ |
... | ... |
@@ -34,6 +34,7 @@ |
34 | 34 |
#include "blockdsp.h" |
35 | 35 |
#include "bswapdsp.h" |
36 | 36 |
#include "dsputil.h" |
37 |
+#include "fdctdsp.h" |
|
37 | 38 |
#include "idctdsp.h" |
38 | 39 |
#include "get_bits.h" |
39 | 40 |
#include "put_bits.h" |
... | ... |
@@ -43,6 +44,7 @@ typedef struct ASV1Context{ |
43 | 43 |
BlockDSPContext bdsp; |
44 | 44 |
BswapDSPContext bbdsp; |
45 | 45 |
DSPContext dsp; |
46 |
+ FDCTDSPContext fdsp; |
|
46 | 47 |
IDCTDSPContext idsp; |
47 | 48 |
PutBitContext pb; |
48 | 49 |
GetBitContext gb; |
... | ... |
@@ -28,6 +28,7 @@ |
28 | 28 |
|
29 | 29 |
#include "asv.h" |
30 | 30 |
#include "avcodec.h" |
31 |
+#include "fdctdsp.h" |
|
31 | 32 |
#include "internal.h" |
32 | 33 |
#include "mathops.h" |
33 | 34 |
#include "mpeg12data.h" |
... | ... |
@@ -164,13 +165,13 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame, |
164 | 164 |
a->dsp.get_pixels(block[2], ptr_y + 8*linesize , linesize); |
165 | 165 |
a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize); |
166 | 166 |
for(i=0; i<4; i++) |
167 |
- a->dsp.fdct(block[i]); |
|
167 |
+ a->fdsp.fdct(block[i]); |
|
168 | 168 |
|
169 | 169 |
if(!(a->avctx->flags&CODEC_FLAG_GRAY)){ |
170 | 170 |
a->dsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); |
171 | 171 |
a->dsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); |
172 | 172 |
for(i=4; i<6; i++) |
173 |
- a->dsp.fdct(block[i]); |
|
173 |
+ a->fdsp.fdct(block[i]); |
|
174 | 174 |
} |
175 | 175 |
} |
176 | 176 |
|
... | ... |
@@ -282,6 +283,7 @@ static av_cold int encode_init(AVCodecContext *avctx){ |
282 | 282 |
|
283 | 283 |
ff_asv_common_init(avctx); |
284 | 284 |
ff_dsputil_init(&a->dsp, avctx); |
285 |
+ ff_fdctdsp_init(&a->fdsp, avctx); |
|
285 | 286 |
|
286 | 287 |
if(avctx->global_quality <= 0) avctx->global_quality= 4*FF_QUALITY_SCALE; |
287 | 288 |
|
... | ... |
@@ -31,6 +31,7 @@ |
31 | 31 |
#include "avcodec.h" |
32 | 32 |
#include "blockdsp.h" |
33 | 33 |
#include "dsputil.h" |
34 |
+#include "fdctdsp.h" |
|
34 | 35 |
#include "internal.h" |
35 | 36 |
#include "mpegvideo.h" |
36 | 37 |
#include "dnxhdenc.h" |
... | ... |
@@ -109,7 +110,7 @@ static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block, |
109 | 109 |
int last_non_zero = 0; |
110 | 110 |
int i; |
111 | 111 |
|
112 |
- ctx->dsp.fdct(block); |
|
112 |
+ ctx->fdsp.fdct(block); |
|
113 | 113 |
|
114 | 114 |
// Divide by 4 with rounding, to compensate scaling of DCT coefficients |
115 | 115 |
block[0] = (block[0] + 2) >> 2; |
... | ... |
@@ -322,6 +323,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) |
322 | 322 |
avctx->bits_per_raw_sample = ctx->cid_table->bit_depth; |
323 | 323 |
|
324 | 324 |
ff_blockdsp_init(&ctx->bdsp, avctx); |
325 |
+ ff_fdctdsp_init(&ctx->m.fdsp, avctx); |
|
325 | 326 |
ff_idctdsp_init(&ctx->m.idsp, avctx); |
326 | 327 |
ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); |
327 | 328 |
ff_dct_common_init(&ctx->m); |
... | ... |
@@ -29,10 +29,8 @@ |
29 | 29 |
#include "libavutil/internal.h" |
30 | 30 |
#include "avcodec.h" |
31 | 31 |
#include "copy_block.h" |
32 |
-#include "dct.h" |
|
33 | 32 |
#include "dsputil.h" |
34 | 33 |
#include "simple_idct.h" |
35 |
-#include "faandct.h" |
|
36 | 34 |
#include "mpegvideo.h" |
37 | 35 |
#include "config.h" |
38 | 36 |
|
... | ... |
@@ -589,7 +587,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, |
589 | 589 |
av_assert2(h == 8); |
590 | 590 |
|
591 | 591 |
s->dsp.diff_pixels(temp, src1, src2, stride); |
592 |
- s->dsp.fdct(temp); |
|
592 |
+ s->fdsp.fdct(temp); |
|
593 | 593 |
return s->dsp.sum_abs_dctelem(temp); |
594 | 594 |
} |
595 | 595 |
|
... | ... |
@@ -656,7 +654,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, |
656 | 656 |
av_assert2(h == 8); |
657 | 657 |
|
658 | 658 |
s->dsp.diff_pixels(temp, src1, src2, stride); |
659 |
- s->dsp.fdct(temp); |
|
659 |
+ s->fdsp.fdct(temp); |
|
660 | 660 |
|
661 | 661 |
for (i = 0; i < 64; i++) |
662 | 662 |
sum = FFMAX(sum, FFABS(temp[i])); |
... | ... |
@@ -973,24 +971,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) |
973 | 973 |
|
974 | 974 |
ff_check_alignment(); |
975 | 975 |
|
976 |
-#if CONFIG_ENCODERS |
|
977 |
- if (avctx->bits_per_raw_sample == 10) { |
|
978 |
- c->fdct = ff_jpeg_fdct_islow_10; |
|
979 |
- c->fdct248 = ff_fdct248_islow_10; |
|
980 |
- } else { |
|
981 |
- if (avctx->dct_algo == FF_DCT_FASTINT) { |
|
982 |
- c->fdct = ff_fdct_ifast; |
|
983 |
- c->fdct248 = ff_fdct_ifast248; |
|
984 |
- } else if (avctx->dct_algo == FF_DCT_FAAN) { |
|
985 |
- c->fdct = ff_faandct; |
|
986 |
- c->fdct248 = ff_faandct248; |
|
987 |
- } else { |
|
988 |
- c->fdct = ff_jpeg_fdct_islow_8; // slow/accurate/default |
|
989 |
- c->fdct248 = ff_fdct248_islow_8; |
|
990 |
- } |
|
991 |
- } |
|
992 |
-#endif /* CONFIG_ENCODERS */ |
|
993 |
- |
|
994 | 976 |
c->diff_pixels = diff_pixels_c; |
995 | 977 |
|
996 | 978 |
c->sum_abs_dctelem = sum_abs_dctelem_c; |
... | ... |
@@ -95,10 +95,6 @@ typedef struct DSPContext { |
95 | 95 |
me_cmp_func frame_skip_cmp[6]; // only width 8 used |
96 | 96 |
|
97 | 97 |
me_cmp_func pix_abs[2][4]; |
98 |
- |
|
99 |
- /* (I)DCT */ |
|
100 |
- void (*fdct)(int16_t *block /* align 16 */); |
|
101 |
- void (*fdct248)(int16_t *block /* align 16 */); |
|
102 | 98 |
} DSPContext; |
103 | 99 |
|
104 | 100 |
void ff_dsputil_static_init(void); |
... | ... |
@@ -29,6 +29,7 @@ |
29 | 29 |
#include "config.h" |
30 | 30 |
#include "avcodec.h" |
31 | 31 |
#include "dsputil.h" |
32 |
+#include "fdctdsp.h" |
|
32 | 33 |
#include "internal.h" |
33 | 34 |
#include "put_bits.h" |
34 | 35 |
#include "dv.h" |
... | ... |
@@ -38,6 +39,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) |
38 | 38 |
{ |
39 | 39 |
DVVideoContext *s = avctx->priv_data; |
40 | 40 |
DSPContext dsp; |
41 |
+ FDCTDSPContext fdsp; |
|
41 | 42 |
int ret; |
42 | 43 |
|
43 | 44 |
s->sys = avpriv_dv_codec_profile(avctx); |
... | ... |
@@ -66,13 +68,14 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) |
66 | 66 |
|
67 | 67 |
memset(&dsp,0, sizeof(dsp)); |
68 | 68 |
ff_dsputil_init(&dsp, avctx); |
69 |
+ ff_fdctdsp_init(&fdsp, avctx); |
|
69 | 70 |
ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp); |
70 | 71 |
|
71 | 72 |
s->get_pixels = dsp.get_pixels; |
72 | 73 |
s->ildct_cmp = dsp.ildct_cmp[5]; |
73 | 74 |
|
74 |
- s->fdct[0] = dsp.fdct; |
|
75 |
- s->fdct[1] = dsp.fdct248; |
|
75 |
+ s->fdct[0] = fdsp.fdct; |
|
76 |
+ s->fdct[1] = fdsp.fdct248; |
|
76 | 77 |
|
77 | 78 |
return ff_dvvideo_init(avctx); |
78 | 79 |
} |
79 | 80 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,50 @@ |
0 |
+/* |
|
1 |
+ * This file is part of FFmpeg. |
|
2 |
+ * |
|
3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include "libavutil/attributes.h" |
|
19 |
+#include "avcodec.h" |
|
20 |
+#include "dct.h" |
|
21 |
+#include "faandct.h" |
|
22 |
+#include "fdctdsp.h" |
|
23 |
+#include "config.h" |
|
24 |
+ |
|
25 |
+av_cold void ff_fdctdsp_init(FDCTDSPContext *c, AVCodecContext *avctx) |
|
26 |
+{ |
|
27 |
+ const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; |
|
28 |
+ |
|
29 |
+ if (avctx->bits_per_raw_sample == 10) { |
|
30 |
+ c->fdct = ff_jpeg_fdct_islow_10; |
|
31 |
+ c->fdct248 = ff_fdct248_islow_10; |
|
32 |
+ } else { |
|
33 |
+ if (avctx->dct_algo == FF_DCT_FASTINT) { |
|
34 |
+ c->fdct = ff_fdct_ifast; |
|
35 |
+ c->fdct248 = ff_fdct_ifast248; |
|
36 |
+ } else if (avctx->dct_algo == FF_DCT_FAAN) { |
|
37 |
+ c->fdct = ff_faandct; |
|
38 |
+ c->fdct248 = ff_faandct248; |
|
39 |
+ } else { |
|
40 |
+ c->fdct = ff_jpeg_fdct_islow_8; // slow/accurate/default |
|
41 |
+ c->fdct248 = ff_fdct248_islow_8; |
|
42 |
+ } |
|
43 |
+ } |
|
44 |
+ |
|
45 |
+ if (ARCH_PPC) |
|
46 |
+ ff_fdctdsp_init_ppc(c, avctx, high_bit_depth); |
|
47 |
+ if (ARCH_X86) |
|
48 |
+ ff_fdctdsp_init_x86(c, avctx, high_bit_depth); |
|
49 |
+} |
0 | 50 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,37 @@ |
0 |
+/* |
|
1 |
+ * This file is part of FFmpeg. |
|
2 |
+ * |
|
3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#ifndef AVCODEC_FDCTDSP_H |
|
19 |
+#define AVCODEC_FDCTDSP_H |
|
20 |
+ |
|
21 |
+#include <stdint.h> |
|
22 |
+ |
|
23 |
+#include "avcodec.h" |
|
24 |
+ |
|
25 |
+typedef struct FDCTDSPContext { |
|
26 |
+ void (*fdct)(int16_t *block /* align 16 */); |
|
27 |
+ void (*fdct248)(int16_t *block /* align 16 */); |
|
28 |
+} FDCTDSPContext; |
|
29 |
+ |
|
30 |
+void ff_fdctdsp_init(FDCTDSPContext *c, AVCodecContext *avctx); |
|
31 |
+void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx, |
|
32 |
+ unsigned high_bit_depth); |
|
33 |
+void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx, |
|
34 |
+ unsigned high_bit_depth); |
|
35 |
+ |
|
36 |
+#endif /* AVCODEC_FDCTDSP_H */ |
... | ... |
@@ -32,6 +32,7 @@ |
32 | 32 |
#include "blockdsp.h" |
33 | 33 |
#include "dsputil.h" |
34 | 34 |
#include "error_resilience.h" |
35 |
+#include "fdctdsp.h" |
|
35 | 36 |
#include "get_bits.h" |
36 | 37 |
#include "h264chroma.h" |
37 | 38 |
#include "h263dsp.h" |
... | ... |
@@ -364,6 +365,7 @@ typedef struct MpegEncContext { |
364 | 364 |
|
365 | 365 |
BlockDSPContext bdsp; |
366 | 366 |
DSPContext dsp; ///< pointers for accelerated dsp functions |
367 |
+ FDCTDSPContext fdsp; |
|
367 | 368 |
H264ChromaContext h264chroma; |
368 | 369 |
HpelDSPContext hdsp; |
369 | 370 |
IDCTDSPContext idsp; |
... | ... |
@@ -78,15 +78,15 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], |
78 | 78 |
const uint16_t *quant_matrix, |
79 | 79 |
int bias, int qmin, int qmax, int intra) |
80 | 80 |
{ |
81 |
- DSPContext *dsp = &s->dsp; |
|
81 |
+ FDCTDSPContext *fdsp = &s->fdsp; |
|
82 | 82 |
int qscale; |
83 | 83 |
int shift = 0; |
84 | 84 |
|
85 | 85 |
for (qscale = qmin; qscale <= qmax; qscale++) { |
86 | 86 |
int i; |
87 |
- if (dsp->fdct == ff_jpeg_fdct_islow_8 || |
|
88 |
- dsp->fdct == ff_jpeg_fdct_islow_10 || |
|
89 |
- dsp->fdct == ff_faandct) { |
|
87 |
+ if (fdsp->fdct == ff_jpeg_fdct_islow_8 || |
|
88 |
+ fdsp->fdct == ff_jpeg_fdct_islow_10 || |
|
89 |
+ fdsp->fdct == ff_faandct) { |
|
90 | 90 |
for (i = 0; i < 64; i++) { |
91 | 91 |
const int j = s->idsp.idct_permutation[i]; |
92 | 92 |
/* 16 <= qscale * quant_matrix[i] <= 7905 |
... | ... |
@@ -98,7 +98,7 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], |
98 | 98 |
qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / |
99 | 99 |
(qscale * quant_matrix[j])); |
100 | 100 |
} |
101 |
- } else if (dsp->fdct == ff_fdct_ifast) { |
|
101 |
+ } else if (fdsp->fdct == ff_fdct_ifast) { |
|
102 | 102 |
for (i = 0; i < 64; i++) { |
103 | 103 |
const int j = s->idsp.idct_permutation[i]; |
104 | 104 |
/* 16 <= qscale * quant_matrix[i] <= 7905 |
... | ... |
@@ -136,7 +136,7 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], |
136 | 136 |
|
137 | 137 |
for (i = intra; i < 64; i++) { |
138 | 138 |
int64_t max = 8191; |
139 |
- if (dsp->fdct == ff_fdct_ifast) { |
|
139 |
+ if (fdsp->fdct == ff_fdct_ifast) { |
|
140 | 140 |
max = (8191LL * ff_aanscales[i]) >> 14; |
141 | 141 |
} |
142 | 142 |
while (((max * qmat[qscale][i]) >> shift) > INT_MAX) { |
... | ... |
@@ -818,6 +818,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) |
818 | 818 |
if (ff_MPV_common_init(s) < 0) |
819 | 819 |
return -1; |
820 | 820 |
|
821 |
+ ff_fdctdsp_init(&s->fdsp, avctx); |
|
821 | 822 |
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); |
822 | 823 |
ff_qpeldsp_init(&s->qdsp); |
823 | 824 |
|
... | ... |
@@ -3714,7 +3715,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s, |
3714 | 3714 |
uint8_t * last_length; |
3715 | 3715 |
const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6); |
3716 | 3716 |
|
3717 |
- s->dsp.fdct (block); |
|
3717 |
+ s->fdsp.fdct(block); |
|
3718 | 3718 |
|
3719 | 3719 |
if(s->dct_error_sum) |
3720 | 3720 |
s->denoise_dct(s, block); |
... | ... |
@@ -3809,7 +3810,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s, |
3809 | 3809 |
int dct_coeff= FFABS(block[ scantable[i] ]); |
3810 | 3810 |
int best_score=256*256*256*120; |
3811 | 3811 |
|
3812 |
- if (s->dsp.fdct == ff_fdct_ifast) |
|
3812 |
+ if (s->fdsp.fdct == ff_fdct_ifast) |
|
3813 | 3813 |
dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12; |
3814 | 3814 |
zero_distortion= dct_coeff*dct_coeff; |
3815 | 3815 |
|
... | ... |
@@ -4141,7 +4142,7 @@ STOP_TIMER("init rem[]") |
4141 | 4141 |
STOP_TIMER("rem*w*w")} |
4142 | 4142 |
{START_TIMER |
4143 | 4143 |
#endif |
4144 |
- s->dsp.fdct(d1); |
|
4144 |
+ s->fdsp.fdct(d1); |
|
4145 | 4145 |
#ifdef REFINE_STATS |
4146 | 4146 |
STOP_TIMER("dct")} |
4147 | 4147 |
#endif |
... | ... |
@@ -4388,7 +4389,7 @@ int ff_dct_quantize_c(MpegEncContext *s, |
4388 | 4388 |
int max=0; |
4389 | 4389 |
unsigned int threshold1, threshold2; |
4390 | 4390 |
|
4391 |
- s->dsp.fdct (block); |
|
4391 |
+ s->fdsp.fdct(block); |
|
4392 | 4392 |
|
4393 | 4393 |
if(s->dct_error_sum) |
4394 | 4394 |
s->denoise_dct(s, block); |
... | ... |
@@ -9,6 +9,7 @@ OBJS-$(CONFIG_H264DSP) += ppc/h264dsp.o ppc/hpeldsp_altivec.o |
9 | 9 |
OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o |
10 | 10 |
OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o |
11 | 11 |
OBJS-$(CONFIG_HUFFYUVDSP) += ppc/huffyuvdsp_altivec.o |
12 |
+OBJS-$(CONFIG_FDCTDSP) += ppc/fdctdsp.o |
|
12 | 13 |
OBJS-$(CONFIG_IDCTDSP) += ppc/idctdsp.o |
13 | 14 |
OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o |
14 | 15 |
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ |
... | ... |
@@ -25,7 +26,6 @@ OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o |
25 | 25 |
OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o |
26 | 26 |
|
27 | 27 |
ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \ |
28 |
- ppc/fdct_altivec.o \ |
|
29 | 28 |
|
30 | 29 |
FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o |
31 | 30 |
FFT-OBJS-$(HAVE_VSX) += ppc/fft_vsx.o |
... | ... |
@@ -35,14 +35,5 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, |
35 | 35 |
int mm_flags = av_get_cpu_flags(); |
36 | 36 |
if (PPC_ALTIVEC(mm_flags)) { |
37 | 37 |
ff_dsputil_init_altivec(c, avctx, high_bit_depth); |
38 |
- |
|
39 |
- if (!high_bit_depth) { |
|
40 |
-#if CONFIG_ENCODERS |
|
41 |
- if (avctx->dct_algo == FF_DCT_AUTO || |
|
42 |
- avctx->dct_algo == FF_DCT_ALTIVEC) { |
|
43 |
- c->fdct = ff_fdct_altivec; |
|
44 |
- } |
|
45 |
-#endif //CONFIG_ENCODERS |
|
46 |
- } |
|
47 | 38 |
} |
48 | 39 |
} |
49 | 40 |
deleted file mode 100644 |
... | ... |
@@ -1,456 +0,0 @@ |
1 |
-/* |
|
2 |
- * Copyright (C) 2003 James Klicman <james@klicman.org> |
|
3 |
- * |
|
4 |
- * This file is part of FFmpeg. |
|
5 |
- * |
|
6 |
- * FFmpeg is free software; you can redistribute it and/or |
|
7 |
- * modify it under the terms of the GNU Lesser General Public |
|
8 |
- * License as published by the Free Software Foundation; either |
|
9 |
- * version 2.1 of the License, or (at your option) any later version. |
|
10 |
- * |
|
11 |
- * FFmpeg is distributed in the hope that it will be useful, |
|
12 |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 |
- * Lesser General Public License for more details. |
|
15 |
- * |
|
16 |
- * You should have received a copy of the GNU Lesser General Public |
|
17 |
- * License along with FFmpeg; if not, write to the Free Software |
|
18 |
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 |
- */ |
|
20 |
- |
|
21 |
-#include "config.h" |
|
22 |
-#if HAVE_ALTIVEC_H |
|
23 |
-#include <altivec.h> |
|
24 |
-#endif |
|
25 |
- |
|
26 |
-#include "libavutil/common.h" |
|
27 |
-#include "dsputil_altivec.h" |
|
28 |
- |
|
29 |
-#define vs16(v) ((vector signed short) (v)) |
|
30 |
-#define vs32(v) ((vector signed int) (v)) |
|
31 |
-#define vu8(v) ((vector unsigned char) (v)) |
|
32 |
-#define vu16(v) ((vector unsigned short) (v)) |
|
33 |
-#define vu32(v) ((vector unsigned int) (v)) |
|
34 |
- |
|
35 |
-#define C1 0.98078525066375732421875000 /* cos(1 * PI / 16) */ |
|
36 |
-#define C2 0.92387950420379638671875000 /* cos(2 * PI / 16) */ |
|
37 |
-#define C3 0.83146959543228149414062500 /* cos(3 * PI / 16) */ |
|
38 |
-#define C4 0.70710676908493041992187500 /* cos(4 * PI / 16) */ |
|
39 |
-#define C5 0.55557024478912353515625000 /* cos(5 * PI / 16) */ |
|
40 |
-#define C6 0.38268342614173889160156250 /* cos(6 * PI / 16) */ |
|
41 |
-#define C7 0.19509032368659973144531250 /* cos(7 * PI / 16) */ |
|
42 |
-#define SQRT_2 1.41421353816986083984375000 /* sqrt(2) */ |
|
43 |
- |
|
44 |
-#define W0 -(2 * C2) |
|
45 |
-#define W1 (2 * C6) |
|
46 |
-#define W2 (SQRT_2 * C6) |
|
47 |
-#define W3 (SQRT_2 * C3) |
|
48 |
-#define W4 (SQRT_2 * (-C1 + C3 + C5 - C7)) |
|
49 |
-#define W5 (SQRT_2 * (C1 + C3 - C5 + C7)) |
|
50 |
-#define W6 (SQRT_2 * (C1 + C3 + C5 - C7)) |
|
51 |
-#define W7 (SQRT_2 * (C1 + C3 - C5 - C7)) |
|
52 |
-#define W8 (SQRT_2 * (C7 - C3)) |
|
53 |
-#define W9 (SQRT_2 * (-C1 - C3)) |
|
54 |
-#define WA (SQRT_2 * (-C3 - C5)) |
|
55 |
-#define WB (SQRT_2 * (C5 - C3)) |
|
56 |
- |
|
57 |
-static vector float fdctconsts[3] = { |
|
58 |
- { W0, W1, W2, W3 }, |
|
59 |
- { W4, W5, W6, W7 }, |
|
60 |
- { W8, W9, WA, WB } |
|
61 |
-}; |
|
62 |
- |
|
63 |
-#define LD_W0 vec_splat(cnsts0, 0) |
|
64 |
-#define LD_W1 vec_splat(cnsts0, 1) |
|
65 |
-#define LD_W2 vec_splat(cnsts0, 2) |
|
66 |
-#define LD_W3 vec_splat(cnsts0, 3) |
|
67 |
-#define LD_W4 vec_splat(cnsts1, 0) |
|
68 |
-#define LD_W5 vec_splat(cnsts1, 1) |
|
69 |
-#define LD_W6 vec_splat(cnsts1, 2) |
|
70 |
-#define LD_W7 vec_splat(cnsts1, 3) |
|
71 |
-#define LD_W8 vec_splat(cnsts2, 0) |
|
72 |
-#define LD_W9 vec_splat(cnsts2, 1) |
|
73 |
-#define LD_WA vec_splat(cnsts2, 2) |
|
74 |
-#define LD_WB vec_splat(cnsts2, 3) |
|
75 |
- |
|
76 |
-#define FDCTROW(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */ \ |
|
77 |
- x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \ |
|
78 |
- x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \ |
|
79 |
- x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \ |
|
80 |
- x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \ |
|
81 |
- x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \ |
|
82 |
- x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \ |
|
83 |
- x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \ |
|
84 |
- x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \ |
|
85 |
- \ |
|
86 |
- b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \ |
|
87 |
- b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \ |
|
88 |
- b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \ |
|
89 |
- b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \ |
|
90 |
- \ |
|
91 |
- b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \ |
|
92 |
- b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \ |
|
93 |
- b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \ |
|
94 |
- cnst = LD_W2; \ |
|
95 |
- b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \ |
|
96 |
- cnst = LD_W1; \ |
|
97 |
- b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \ |
|
98 |
- cnst = LD_W0; \ |
|
99 |
- b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \ |
|
100 |
- \ |
|
101 |
- x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \ |
|
102 |
- x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \ |
|
103 |
- x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \ |
|
104 |
- x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \ |
|
105 |
- x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \ |
|
106 |
- cnst = LD_W3; \ |
|
107 |
- x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \ |
|
108 |
- \ |
|
109 |
- cnst = LD_W8; \ |
|
110 |
- x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \ |
|
111 |
- cnst = LD_W9; \ |
|
112 |
- x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \ |
|
113 |
- cnst = LD_WA; \ |
|
114 |
- x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \ |
|
115 |
- cnst = LD_WB; \ |
|
116 |
- x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \ |
|
117 |
- \ |
|
118 |
- cnst = LD_W4; \ |
|
119 |
- b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \ |
|
120 |
- cnst = LD_W5; \ |
|
121 |
- b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \ |
|
122 |
- cnst = LD_W6; \ |
|
123 |
- b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \ |
|
124 |
- cnst = LD_W7; \ |
|
125 |
- b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \ |
|
126 |
- \ |
|
127 |
- b7 = vec_add(b7, x2); /* b7 = b7 + x2; */ \ |
|
128 |
- b5 = vec_add(b5, x3); /* b5 = b5 + x3; */ \ |
|
129 |
- b3 = vec_add(b3, x2); /* b3 = b3 + x2; */ \ |
|
130 |
- b1 = vec_add(b1, x3) /* b1 = b1 + x3; */ \ |
|
131 |
- /* }}} */ |
|
132 |
- |
|
133 |
-#define FDCTCOL(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */ \ |
|
134 |
- x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \ |
|
135 |
- x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \ |
|
136 |
- x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \ |
|
137 |
- x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \ |
|
138 |
- x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \ |
|
139 |
- x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \ |
|
140 |
- x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \ |
|
141 |
- x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \ |
|
142 |
- \ |
|
143 |
- b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \ |
|
144 |
- b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \ |
|
145 |
- b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \ |
|
146 |
- b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \ |
|
147 |
- \ |
|
148 |
- b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \ |
|
149 |
- b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \ |
|
150 |
- b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \ |
|
151 |
- cnst = LD_W2; \ |
|
152 |
- b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \ |
|
153 |
- cnst = LD_W1; \ |
|
154 |
- b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \ |
|
155 |
- cnst = LD_W0; \ |
|
156 |
- b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \ |
|
157 |
- \ |
|
158 |
- x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \ |
|
159 |
- x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \ |
|
160 |
- x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \ |
|
161 |
- x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \ |
|
162 |
- x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \ |
|
163 |
- cnst = LD_W3; \ |
|
164 |
- x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \ |
|
165 |
- \ |
|
166 |
- cnst = LD_W8; \ |
|
167 |
- x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \ |
|
168 |
- cnst = LD_W9; \ |
|
169 |
- x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \ |
|
170 |
- cnst = LD_WA; \ |
|
171 |
- x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \ |
|
172 |
- cnst = LD_WB; \ |
|
173 |
- x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \ |
|
174 |
- \ |
|
175 |
- cnst = LD_W4; \ |
|
176 |
- b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \ |
|
177 |
- cnst = LD_W5; \ |
|
178 |
- b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \ |
|
179 |
- cnst = LD_W6; \ |
|
180 |
- b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \ |
|
181 |
- cnst = LD_W7; \ |
|
182 |
- b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \ |
|
183 |
- \ |
|
184 |
- b7 = vec_add(b7, x2); /* b7 += x2; */ \ |
|
185 |
- b5 = vec_add(b5, x3); /* b5 += x3; */ \ |
|
186 |
- b3 = vec_add(b3, x2); /* b3 += x2; */ \ |
|
187 |
- b1 = vec_add(b1, x3) /* b1 += x3; */ \ |
|
188 |
- /* }}} */ |
|
189 |
- |
|
190 |
-/* two dimensional discrete cosine transform */ |
|
191 |
-void ff_fdct_altivec(int16_t *block) |
|
192 |
-{ |
|
193 |
- vector signed short *bp; |
|
194 |
- vector float *cp = fdctconsts; |
|
195 |
- vector float b00, b10, b20, b30, b40, b50, b60, b70; |
|
196 |
- vector float b01, b11, b21, b31, b41, b51, b61, b71; |
|
197 |
- vector float mzero, cnst, cnsts0, cnsts1, cnsts2; |
|
198 |
- vector float x0, x1, x2, x3, x4, x5, x6, x7, x8; |
|
199 |
- |
|
200 |
- /* setup constants {{{ */ |
|
201 |
- /* mzero = -0.0 */ |
|
202 |
- mzero = ((vector float) vec_splat_u32(-1)); |
|
203 |
- mzero = ((vector float) vec_sl(vu32(mzero), vu32(mzero))); |
|
204 |
- cnsts0 = vec_ld(0, cp); |
|
205 |
- cp++; |
|
206 |
- cnsts1 = vec_ld(0, cp); |
|
207 |
- cp++; |
|
208 |
- cnsts2 = vec_ld(0, cp); |
|
209 |
- /* }}} */ |
|
210 |
- |
|
211 |
- /* 8x8 matrix transpose (vector short[8]) {{{ */ |
|
212 |
-#define MERGE_S16(hl, a, b) vec_merge ## hl(vs16(a), vs16(b)) |
|
213 |
- |
|
214 |
- bp = (vector signed short *) block; |
|
215 |
- b00 = ((vector float) vec_ld(0, bp)); |
|
216 |
- b40 = ((vector float) vec_ld(16 * 4, bp)); |
|
217 |
- b01 = ((vector float) MERGE_S16(h, b00, b40)); |
|
218 |
- b11 = ((vector float) MERGE_S16(l, b00, b40)); |
|
219 |
- bp++; |
|
220 |
- b10 = ((vector float) vec_ld(0, bp)); |
|
221 |
- b50 = ((vector float) vec_ld(16 * 4, bp)); |
|
222 |
- b21 = ((vector float) MERGE_S16(h, b10, b50)); |
|
223 |
- b31 = ((vector float) MERGE_S16(l, b10, b50)); |
|
224 |
- bp++; |
|
225 |
- b20 = ((vector float) vec_ld(0, bp)); |
|
226 |
- b60 = ((vector float) vec_ld(16 * 4, bp)); |
|
227 |
- b41 = ((vector float) MERGE_S16(h, b20, b60)); |
|
228 |
- b51 = ((vector float) MERGE_S16(l, b20, b60)); |
|
229 |
- bp++; |
|
230 |
- b30 = ((vector float) vec_ld(0, bp)); |
|
231 |
- b70 = ((vector float) vec_ld(16 * 4, bp)); |
|
232 |
- b61 = ((vector float) MERGE_S16(h, b30, b70)); |
|
233 |
- b71 = ((vector float) MERGE_S16(l, b30, b70)); |
|
234 |
- |
|
235 |
- x0 = ((vector float) MERGE_S16(h, b01, b41)); |
|
236 |
- x1 = ((vector float) MERGE_S16(l, b01, b41)); |
|
237 |
- x2 = ((vector float) MERGE_S16(h, b11, b51)); |
|
238 |
- x3 = ((vector float) MERGE_S16(l, b11, b51)); |
|
239 |
- x4 = ((vector float) MERGE_S16(h, b21, b61)); |
|
240 |
- x5 = ((vector float) MERGE_S16(l, b21, b61)); |
|
241 |
- x6 = ((vector float) MERGE_S16(h, b31, b71)); |
|
242 |
- x7 = ((vector float) MERGE_S16(l, b31, b71)); |
|
243 |
- |
|
244 |
- b00 = ((vector float) MERGE_S16(h, x0, x4)); |
|
245 |
- b10 = ((vector float) MERGE_S16(l, x0, x4)); |
|
246 |
- b20 = ((vector float) MERGE_S16(h, x1, x5)); |
|
247 |
- b30 = ((vector float) MERGE_S16(l, x1, x5)); |
|
248 |
- b40 = ((vector float) MERGE_S16(h, x2, x6)); |
|
249 |
- b50 = ((vector float) MERGE_S16(l, x2, x6)); |
|
250 |
- b60 = ((vector float) MERGE_S16(h, x3, x7)); |
|
251 |
- b70 = ((vector float) MERGE_S16(l, x3, x7)); |
|
252 |
- |
|
253 |
-#undef MERGE_S16 |
|
254 |
- /* }}} */ |
|
255 |
- |
|
256 |
- /* Some of the initial calculations can be done as vector short |
|
257 |
- * before conversion to vector float. The following code section |
|
258 |
- * takes advantage of this. */ |
|
259 |
- |
|
260 |
- /* fdct rows {{{ */ |
|
261 |
- x0 = ((vector float) vec_add(vs16(b00), vs16(b70))); |
|
262 |
- x7 = ((vector float) vec_sub(vs16(b00), vs16(b70))); |
|
263 |
- x1 = ((vector float) vec_add(vs16(b10), vs16(b60))); |
|
264 |
- x6 = ((vector float) vec_sub(vs16(b10), vs16(b60))); |
|
265 |
- x2 = ((vector float) vec_add(vs16(b20), vs16(b50))); |
|
266 |
- x5 = ((vector float) vec_sub(vs16(b20), vs16(b50))); |
|
267 |
- x3 = ((vector float) vec_add(vs16(b30), vs16(b40))); |
|
268 |
- x4 = ((vector float) vec_sub(vs16(b30), vs16(b40))); |
|
269 |
- |
|
270 |
- b70 = ((vector float) vec_add(vs16(x0), vs16(x3))); |
|
271 |
- b10 = ((vector float) vec_add(vs16(x1), vs16(x2))); |
|
272 |
- |
|
273 |
- b00 = ((vector float) vec_add(vs16(b70), vs16(b10))); |
|
274 |
- b40 = ((vector float) vec_sub(vs16(b70), vs16(b10))); |
|
275 |
- |
|
276 |
-#define CTF0(n) \ |
|
277 |
- b ## n ## 1 = ((vector float) vec_unpackl(vs16(b ## n ## 0))); \ |
|
278 |
- b ## n ## 0 = ((vector float) vec_unpackh(vs16(b ## n ## 0))); \ |
|
279 |
- b ## n ## 1 = vec_ctf(vs32(b ## n ## 1), 0); \ |
|
280 |
- b ## n ## 0 = vec_ctf(vs32(b ## n ## 0), 0) |
|
281 |
- |
|
282 |
- CTF0(0); |
|
283 |
- CTF0(4); |
|
284 |
- |
|
285 |
- b20 = ((vector float) vec_sub(vs16(x0), vs16(x3))); |
|
286 |
- b60 = ((vector float) vec_sub(vs16(x1), vs16(x2))); |
|
287 |
- |
|
288 |
- CTF0(2); |
|
289 |
- CTF0(6); |
|
290 |
- |
|
291 |
-#undef CTF0 |
|
292 |
- |
|
293 |
- x0 = vec_add(b60, b20); |
|
294 |
- x1 = vec_add(b61, b21); |
|
295 |
- |
|
296 |
- cnst = LD_W2; |
|
297 |
- x0 = vec_madd(cnst, x0, mzero); |
|
298 |
- x1 = vec_madd(cnst, x1, mzero); |
|
299 |
- cnst = LD_W1; |
|
300 |
- b20 = vec_madd(cnst, b20, x0); |
|
301 |
- b21 = vec_madd(cnst, b21, x1); |
|
302 |
- cnst = LD_W0; |
|
303 |
- b60 = vec_madd(cnst, b60, x0); |
|
304 |
- b61 = vec_madd(cnst, b61, x1); |
|
305 |
- |
|
306 |
-#define CTFX(x, b) \ |
|
307 |
- b ## 0 = ((vector float) vec_unpackh(vs16(x))); \ |
|
308 |
- b ## 1 = ((vector float) vec_unpackl(vs16(x))); \ |
|
309 |
- b ## 0 = vec_ctf(vs32(b ## 0), 0); \ |
|
310 |
- b ## 1 = vec_ctf(vs32(b ## 1), 0) |
|
311 |
- |
|
312 |
- CTFX(x4, b7); |
|
313 |
- CTFX(x5, b5); |
|
314 |
- CTFX(x6, b3); |
|
315 |
- CTFX(x7, b1); |
|
316 |
- |
|
317 |
-#undef CTFX |
|
318 |
- |
|
319 |
- x0 = vec_add(b70, b10); |
|
320 |
- x1 = vec_add(b50, b30); |
|
321 |
- x2 = vec_add(b70, b30); |
|
322 |
- x3 = vec_add(b50, b10); |
|
323 |
- x8 = vec_add(x2, x3); |
|
324 |
- cnst = LD_W3; |
|
325 |
- x8 = vec_madd(cnst, x8, mzero); |
|
326 |
- |
|
327 |
- cnst = LD_W8; |
|
328 |
- x0 = vec_madd(cnst, x0, mzero); |
|
329 |
- cnst = LD_W9; |
|
330 |
- x1 = vec_madd(cnst, x1, mzero); |
|
331 |
- cnst = LD_WA; |
|
332 |
- x2 = vec_madd(cnst, x2, x8); |
|
333 |
- cnst = LD_WB; |
|
334 |
- x3 = vec_madd(cnst, x3, x8); |
|
335 |
- |
|
336 |
- cnst = LD_W4; |
|
337 |
- b70 = vec_madd(cnst, b70, x0); |
|
338 |
- cnst = LD_W5; |
|
339 |
- b50 = vec_madd(cnst, b50, x1); |
|
340 |
- cnst = LD_W6; |
|
341 |
- b30 = vec_madd(cnst, b30, x1); |
|
342 |
- cnst = LD_W7; |
|
343 |
- b10 = vec_madd(cnst, b10, x0); |
|
344 |
- |
|
345 |
- b70 = vec_add(b70, x2); |
|
346 |
- b50 = vec_add(b50, x3); |
|
347 |
- b30 = vec_add(b30, x2); |
|
348 |
- b10 = vec_add(b10, x3); |
|
349 |
- |
|
350 |
- x0 = vec_add(b71, b11); |
|
351 |
- x1 = vec_add(b51, b31); |
|
352 |
- x2 = vec_add(b71, b31); |
|
353 |
- x3 = vec_add(b51, b11); |
|
354 |
- x8 = vec_add(x2, x3); |
|
355 |
- cnst = LD_W3; |
|
356 |
- x8 = vec_madd(cnst, x8, mzero); |
|
357 |
- |
|
358 |
- cnst = LD_W8; |
|
359 |
- x0 = vec_madd(cnst, x0, mzero); |
|
360 |
- cnst = LD_W9; |
|
361 |
- x1 = vec_madd(cnst, x1, mzero); |
|
362 |
- cnst = LD_WA; |
|
363 |
- x2 = vec_madd(cnst, x2, x8); |
|
364 |
- cnst = LD_WB; |
|
365 |
- x3 = vec_madd(cnst, x3, x8); |
|
366 |
- |
|
367 |
- cnst = LD_W4; |
|
368 |
- b71 = vec_madd(cnst, b71, x0); |
|
369 |
- cnst = LD_W5; |
|
370 |
- b51 = vec_madd(cnst, b51, x1); |
|
371 |
- cnst = LD_W6; |
|
372 |
- b31 = vec_madd(cnst, b31, x1); |
|
373 |
- cnst = LD_W7; |
|
374 |
- b11 = vec_madd(cnst, b11, x0); |
|
375 |
- |
|
376 |
- b71 = vec_add(b71, x2); |
|
377 |
- b51 = vec_add(b51, x3); |
|
378 |
- b31 = vec_add(b31, x2); |
|
379 |
- b11 = vec_add(b11, x3); |
|
380 |
- /* }}} */ |
|
381 |
- |
|
382 |
- /* 8x8 matrix transpose (vector float[8][2]) {{{ */ |
|
383 |
- x0 = vec_mergel(b00, b20); |
|
384 |
- x1 = vec_mergeh(b00, b20); |
|
385 |
- x2 = vec_mergel(b10, b30); |
|
386 |
- x3 = vec_mergeh(b10, b30); |
|
387 |
- |
|
388 |
- b00 = vec_mergeh(x1, x3); |
|
389 |
- b10 = vec_mergel(x1, x3); |
|
390 |
- b20 = vec_mergeh(x0, x2); |
|
391 |
- b30 = vec_mergel(x0, x2); |
|
392 |
- |
|
393 |
- x4 = vec_mergel(b41, b61); |
|
394 |
- x5 = vec_mergeh(b41, b61); |
|
395 |
- x6 = vec_mergel(b51, b71); |
|
396 |
- x7 = vec_mergeh(b51, b71); |
|
397 |
- |
|
398 |
- b41 = vec_mergeh(x5, x7); |
|
399 |
- b51 = vec_mergel(x5, x7); |
|
400 |
- b61 = vec_mergeh(x4, x6); |
|
401 |
- b71 = vec_mergel(x4, x6); |
|
402 |
- |
|
403 |
- x0 = vec_mergel(b01, b21); |
|
404 |
- x1 = vec_mergeh(b01, b21); |
|
405 |
- x2 = vec_mergel(b11, b31); |
|
406 |
- x3 = vec_mergeh(b11, b31); |
|
407 |
- |
|
408 |
- x4 = vec_mergel(b40, b60); |
|
409 |
- x5 = vec_mergeh(b40, b60); |
|
410 |
- x6 = vec_mergel(b50, b70); |
|
411 |
- x7 = vec_mergeh(b50, b70); |
|
412 |
- |
|
413 |
- b40 = vec_mergeh(x1, x3); |
|
414 |
- b50 = vec_mergel(x1, x3); |
|
415 |
- b60 = vec_mergeh(x0, x2); |
|
416 |
- b70 = vec_mergel(x0, x2); |
|
417 |
- |
|
418 |
- b01 = vec_mergeh(x5, x7); |
|
419 |
- b11 = vec_mergel(x5, x7); |
|
420 |
- b21 = vec_mergeh(x4, x6); |
|
421 |
- b31 = vec_mergel(x4, x6); |
|
422 |
- /* }}} */ |
|
423 |
- |
|
424 |
- FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70); |
|
425 |
- FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71); |
|
426 |
- |
|
427 |
- /* round, convert back to short {{{ */ |
|
428 |
-#define CTS(n) \ |
|
429 |
- b ## n ## 0 = vec_round(b ## n ## 0); \ |
|
430 |
- b ## n ## 1 = vec_round(b ## n ## 1); \ |
|
431 |
- b ## n ## 0 = ((vector float) vec_cts(b ## n ## 0, 0)); \ |
|
432 |
- b ## n ## 1 = ((vector float) vec_cts(b ## n ## 1, 0)); \ |
|
433 |
- b ## n ## 0 = ((vector float) vec_pack(vs32(b ## n ## 0), \ |
|
434 |
- vs32(b ## n ## 1))); \ |
|
435 |
- vec_st(vs16(b ## n ## 0), 0, bp) |
|
436 |
- |
|
437 |
- bp = (vector signed short *) block; |
|
438 |
- CTS(0); |
|
439 |
- bp++; |
|
440 |
- CTS(1); |
|
441 |
- bp++; |
|
442 |
- CTS(2); |
|
443 |
- bp++; |
|
444 |
- CTS(3); |
|
445 |
- bp++; |
|
446 |
- CTS(4); |
|
447 |
- bp++; |
|
448 |
- CTS(5); |
|
449 |
- bp++; |
|
450 |
- CTS(6); |
|
451 |
- bp++; |
|
452 |
- CTS(7); |
|
453 |
- |
|
454 |
-#undef CTS |
|
455 |
- /* }}} */ |
|
456 |
-} |
457 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,479 @@ |
0 |
+/* |
|
1 |
+ * Copyright (C) 2003 James Klicman <james@klicman.org> |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include "config.h" |
|
21 |
+#if HAVE_ALTIVEC_H |
|
22 |
+#include <altivec.h> |
|
23 |
+#endif |
|
24 |
+ |
|
25 |
+#include "libavutil/attributes.h" |
|
26 |
+#include "libavutil/cpu.h" |
|
27 |
+#include "libavutil/ppc/cpu.h" |
|
28 |
+#include "libavcodec/fdctdsp.h" |
|
29 |
+#include "fdctdsp.h" |
|
30 |
+ |
|
31 |
+#if HAVE_ALTIVEC |
|
32 |
+ |
|
33 |
+#define vs16(v) ((vector signed short) (v)) |
|
34 |
+#define vs32(v) ((vector signed int) (v)) |
|
35 |
+#define vu8(v) ((vector unsigned char) (v)) |
|
36 |
+#define vu16(v) ((vector unsigned short) (v)) |
|
37 |
+#define vu32(v) ((vector unsigned int) (v)) |
|
38 |
+ |
|
39 |
+#define C1 0.98078525066375732421875000 /* cos(1 * PI / 16) */ |
|
40 |
+#define C2 0.92387950420379638671875000 /* cos(2 * PI / 16) */ |
|
41 |
+#define C3 0.83146959543228149414062500 /* cos(3 * PI / 16) */ |
|
42 |
+#define C4 0.70710676908493041992187500 /* cos(4 * PI / 16) */ |
|
43 |
+#define C5 0.55557024478912353515625000 /* cos(5 * PI / 16) */ |
|
44 |
+#define C6 0.38268342614173889160156250 /* cos(6 * PI / 16) */ |
|
45 |
+#define C7 0.19509032368659973144531250 /* cos(7 * PI / 16) */ |
|
46 |
+#define SQRT_2 1.41421353816986083984375000 /* sqrt(2) */ |
|
47 |
+ |
|
48 |
+#define W0 -(2 * C2) |
|
49 |
+#define W1 (2 * C6) |
|
50 |
+#define W2 (SQRT_2 * C6) |
|
51 |
+#define W3 (SQRT_2 * C3) |
|
52 |
+#define W4 (SQRT_2 * (-C1 + C3 + C5 - C7)) |
|
53 |
+#define W5 (SQRT_2 * (C1 + C3 - C5 + C7)) |
|
54 |
+#define W6 (SQRT_2 * (C1 + C3 + C5 - C7)) |
|
55 |
+#define W7 (SQRT_2 * (C1 + C3 - C5 - C7)) |
|
56 |
+#define W8 (SQRT_2 * (C7 - C3)) |
|
57 |
+#define W9 (SQRT_2 * (-C1 - C3)) |
|
58 |
+#define WA (SQRT_2 * (-C3 - C5)) |
|
59 |
+#define WB (SQRT_2 * (C5 - C3)) |
|
60 |
+ |
|
61 |
+static vector float fdctconsts[3] = { |
|
62 |
+ { W0, W1, W2, W3 }, |
|
63 |
+ { W4, W5, W6, W7 }, |
|
64 |
+ { W8, W9, WA, WB } |
|
65 |
+}; |
|
66 |
+ |
|
67 |
+#define LD_W0 vec_splat(cnsts0, 0) |
|
68 |
+#define LD_W1 vec_splat(cnsts0, 1) |
|
69 |
+#define LD_W2 vec_splat(cnsts0, 2) |
|
70 |
+#define LD_W3 vec_splat(cnsts0, 3) |
|
71 |
+#define LD_W4 vec_splat(cnsts1, 0) |
|
72 |
+#define LD_W5 vec_splat(cnsts1, 1) |
|
73 |
+#define LD_W6 vec_splat(cnsts1, 2) |
|
74 |
+#define LD_W7 vec_splat(cnsts1, 3) |
|
75 |
+#define LD_W8 vec_splat(cnsts2, 0) |
|
76 |
+#define LD_W9 vec_splat(cnsts2, 1) |
|
77 |
+#define LD_WA vec_splat(cnsts2, 2) |
|
78 |
+#define LD_WB vec_splat(cnsts2, 3) |
|
79 |
+ |
|
80 |
+#define FDCTROW(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */ \ |
|
81 |
+ x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \ |
|
82 |
+ x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \ |
|
83 |
+ x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \ |
|
84 |
+ x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \ |
|
85 |
+ x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \ |
|
86 |
+ x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \ |
|
87 |
+ x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \ |
|
88 |
+ x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \ |
|
89 |
+ \ |
|
90 |
+ b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \ |
|
91 |
+ b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \ |
|
92 |
+ b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \ |
|
93 |
+ b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \ |
|
94 |
+ \ |
|
95 |
+ b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \ |
|
96 |
+ b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \ |
|
97 |
+ b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \ |
|
98 |
+ cnst = LD_W2; \ |
|
99 |
+ b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \ |
|
100 |
+ cnst = LD_W1; \ |
|
101 |
+ b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \ |
|
102 |
+ cnst = LD_W0; \ |
|
103 |
+ b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \ |
|
104 |
+ \ |
|
105 |
+ x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \ |
|
106 |
+ x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \ |
|
107 |
+ x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \ |
|
108 |
+ x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \ |
|
109 |
+ x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \ |
|
110 |
+ cnst = LD_W3; \ |
|
111 |
+ x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \ |
|
112 |
+ \ |
|
113 |
+ cnst = LD_W8; \ |
|
114 |
+ x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \ |
|
115 |
+ cnst = LD_W9; \ |
|
116 |
+ x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \ |
|
117 |
+ cnst = LD_WA; \ |
|
118 |
+ x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \ |
|
119 |
+ cnst = LD_WB; \ |
|
120 |
+ x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \ |
|
121 |
+ \ |
|
122 |
+ cnst = LD_W4; \ |
|
123 |
+ b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \ |
|
124 |
+ cnst = LD_W5; \ |
|
125 |
+ b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \ |
|
126 |
+ cnst = LD_W6; \ |
|
127 |
+ b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \ |
|
128 |
+ cnst = LD_W7; \ |
|
129 |
+ b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \ |
|
130 |
+ \ |
|
131 |
+ b7 = vec_add(b7, x2); /* b7 = b7 + x2; */ \ |
|
132 |
+ b5 = vec_add(b5, x3); /* b5 = b5 + x3; */ \ |
|
133 |
+ b3 = vec_add(b3, x2); /* b3 = b3 + x2; */ \ |
|
134 |
+ b1 = vec_add(b1, x3) /* b1 = b1 + x3; */ \ |
|
135 |
+ /* }}} */ |
|
136 |
+ |
|
137 |
+#define FDCTCOL(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */ \ |
|
138 |
+ x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \ |
|
139 |
+ x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \ |
|
140 |
+ x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \ |
|
141 |
+ x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \ |
|
142 |
+ x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \ |
|
143 |
+ x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \ |
|
144 |
+ x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \ |
|
145 |
+ x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \ |
|
146 |
+ \ |
|
147 |
+ b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \ |
|
148 |
+ b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \ |
|
149 |
+ b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \ |
|
150 |
+ b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \ |
|
151 |
+ \ |
|
152 |
+ b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \ |
|
153 |
+ b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \ |
|
154 |
+ b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \ |
|
155 |
+ cnst = LD_W2; \ |
|
156 |
+ b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \ |
|
157 |
+ cnst = LD_W1; \ |
|
158 |
+ b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \ |
|
159 |
+ cnst = LD_W0; \ |
|
160 |
+ b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \ |
|
161 |
+ \ |
|
162 |
+ x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \ |
|
163 |
+ x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \ |
|
164 |
+ x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \ |
|
165 |
+ x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \ |
|
166 |
+ x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \ |
|
167 |
+ cnst = LD_W3; \ |
|
168 |
+ x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \ |
|
169 |
+ \ |
|
170 |
+ cnst = LD_W8; \ |
|
171 |
+ x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \ |
|
172 |
+ cnst = LD_W9; \ |
|
173 |
+ x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \ |
|
174 |
+ cnst = LD_WA; \ |
|
175 |
+ x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \ |
|
176 |
+ cnst = LD_WB; \ |
|
177 |
+ x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \ |
|
178 |
+ \ |
|
179 |
+ cnst = LD_W4; \ |
|
180 |
+ b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \ |
|
181 |
+ cnst = LD_W5; \ |
|
182 |
+ b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \ |
|
183 |
+ cnst = LD_W6; \ |
|
184 |
+ b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \ |
|
185 |
+ cnst = LD_W7; \ |
|
186 |
+ b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \ |
|
187 |
+ \ |
|
188 |
+ b7 = vec_add(b7, x2); /* b7 += x2; */ \ |
|
189 |
+ b5 = vec_add(b5, x3); /* b5 += x3; */ \ |
|
190 |
+ b3 = vec_add(b3, x2); /* b3 += x2; */ \ |
|
191 |
+ b1 = vec_add(b1, x3) /* b1 += x3; */ \ |
|
192 |
+ /* }}} */ |
|
193 |
+ |
|
194 |
+/* two dimensional discrete cosine transform */ |
|
195 |
+void ff_fdct_altivec(int16_t *block) |
|
196 |
+{ |
|
197 |
+ vector signed short *bp; |
|
198 |
+ vector float *cp = fdctconsts; |
|
199 |
+ vector float b00, b10, b20, b30, b40, b50, b60, b70; |
|
200 |
+ vector float b01, b11, b21, b31, b41, b51, b61, b71; |
|
201 |
+ vector float mzero, cnst, cnsts0, cnsts1, cnsts2; |
|
202 |
+ vector float x0, x1, x2, x3, x4, x5, x6, x7, x8; |
|
203 |
+ |
|
204 |
+ /* setup constants {{{ */ |
|
205 |
+ /* mzero = -0.0 */ |
|
206 |
+ mzero = ((vector float) vec_splat_u32(-1)); |
|
207 |
+ mzero = ((vector float) vec_sl(vu32(mzero), vu32(mzero))); |
|
208 |
+ cnsts0 = vec_ld(0, cp); |
|
209 |
+ cp++; |
|
210 |
+ cnsts1 = vec_ld(0, cp); |
|
211 |
+ cp++; |
|
212 |
+ cnsts2 = vec_ld(0, cp); |
|
213 |
+ /* }}} */ |
|
214 |
+ |
|
215 |
+ /* 8x8 matrix transpose (vector short[8]) {{{ */ |
|
216 |
+#define MERGE_S16(hl, a, b) vec_merge ## hl(vs16(a), vs16(b)) |
|
217 |
+ |
|
218 |
+ bp = (vector signed short *) block; |
|
219 |
+ b00 = ((vector float) vec_ld(0, bp)); |
|
220 |
+ b40 = ((vector float) vec_ld(16 * 4, bp)); |
|
221 |
+ b01 = ((vector float) MERGE_S16(h, b00, b40)); |
|
222 |
+ b11 = ((vector float) MERGE_S16(l, b00, b40)); |
|
223 |
+ bp++; |
|
224 |
+ b10 = ((vector float) vec_ld(0, bp)); |
|
225 |
+ b50 = ((vector float) vec_ld(16 * 4, bp)); |
|
226 |
+ b21 = ((vector float) MERGE_S16(h, b10, b50)); |
|
227 |
+ b31 = ((vector float) MERGE_S16(l, b10, b50)); |
|
228 |
+ bp++; |
|
229 |
+ b20 = ((vector float) vec_ld(0, bp)); |
|
230 |
+ b60 = ((vector float) vec_ld(16 * 4, bp)); |
|
231 |
+ b41 = ((vector float) MERGE_S16(h, b20, b60)); |
|
232 |
+ b51 = ((vector float) MERGE_S16(l, b20, b60)); |
|
233 |
+ bp++; |
|
234 |
+ b30 = ((vector float) vec_ld(0, bp)); |
|
235 |
+ b70 = ((vector float) vec_ld(16 * 4, bp)); |
|
236 |
+ b61 = ((vector float) MERGE_S16(h, b30, b70)); |
|
237 |
+ b71 = ((vector float) MERGE_S16(l, b30, b70)); |
|
238 |
+ |
|
239 |
+ x0 = ((vector float) MERGE_S16(h, b01, b41)); |
|
240 |
+ x1 = ((vector float) MERGE_S16(l, b01, b41)); |
|
241 |
+ x2 = ((vector float) MERGE_S16(h, b11, b51)); |
|
242 |
+ x3 = ((vector float) MERGE_S16(l, b11, b51)); |
|
243 |
+ x4 = ((vector float) MERGE_S16(h, b21, b61)); |
|
244 |
+ x5 = ((vector float) MERGE_S16(l, b21, b61)); |
|
245 |
+ x6 = ((vector float) MERGE_S16(h, b31, b71)); |
|
246 |
+ x7 = ((vector float) MERGE_S16(l, b31, b71)); |
|
247 |
+ |
|
248 |
+ b00 = ((vector float) MERGE_S16(h, x0, x4)); |
|
249 |
+ b10 = ((vector float) MERGE_S16(l, x0, x4)); |
|
250 |
+ b20 = ((vector float) MERGE_S16(h, x1, x5)); |
|
251 |
+ b30 = ((vector float) MERGE_S16(l, x1, x5)); |
|
252 |
+ b40 = ((vector float) MERGE_S16(h, x2, x6)); |
|
253 |
+ b50 = ((vector float) MERGE_S16(l, x2, x6)); |
|
254 |
+ b60 = ((vector float) MERGE_S16(h, x3, x7)); |
|
255 |
+ b70 = ((vector float) MERGE_S16(l, x3, x7)); |
|
256 |
+ |
|
257 |
+#undef MERGE_S16 |
|
258 |
+ /* }}} */ |
|
259 |
+ |
|
260 |
+ /* Some of the initial calculations can be done as vector short |
|
261 |
+ * before conversion to vector float. The following code section |
|
262 |
+ * takes advantage of this. */ |
|
263 |
+ |
|
264 |
+ /* fdct rows {{{ */ |
|
265 |
+ x0 = ((vector float) vec_add(vs16(b00), vs16(b70))); |
|
266 |
+ x7 = ((vector float) vec_sub(vs16(b00), vs16(b70))); |
|
267 |
+ x1 = ((vector float) vec_add(vs16(b10), vs16(b60))); |
|
268 |
+ x6 = ((vector float) vec_sub(vs16(b10), vs16(b60))); |
|
269 |
+ x2 = ((vector float) vec_add(vs16(b20), vs16(b50))); |
|
270 |
+ x5 = ((vector float) vec_sub(vs16(b20), vs16(b50))); |
|
271 |
+ x3 = ((vector float) vec_add(vs16(b30), vs16(b40))); |
|
272 |
+ x4 = ((vector float) vec_sub(vs16(b30), vs16(b40))); |
|
273 |
+ |
|
274 |
+ b70 = ((vector float) vec_add(vs16(x0), vs16(x3))); |
|
275 |
+ b10 = ((vector float) vec_add(vs16(x1), vs16(x2))); |
|
276 |
+ |
|
277 |
+ b00 = ((vector float) vec_add(vs16(b70), vs16(b10))); |
|
278 |
+ b40 = ((vector float) vec_sub(vs16(b70), vs16(b10))); |
|
279 |
+ |
|
280 |
+#define CTF0(n) \ |
|
281 |
+ b ## n ## 1 = ((vector float) vec_unpackl(vs16(b ## n ## 0))); \ |
|
282 |
+ b ## n ## 0 = ((vector float) vec_unpackh(vs16(b ## n ## 0))); \ |
|
283 |
+ b ## n ## 1 = vec_ctf(vs32(b ## n ## 1), 0); \ |
|
284 |
+ b ## n ## 0 = vec_ctf(vs32(b ## n ## 0), 0) |
|
285 |
+ |
|
286 |
+ CTF0(0); |
|
287 |
+ CTF0(4); |
|
288 |
+ |
|
289 |
+ b20 = ((vector float) vec_sub(vs16(x0), vs16(x3))); |
|
290 |
+ b60 = ((vector float) vec_sub(vs16(x1), vs16(x2))); |
|
291 |
+ |
|
292 |
+ CTF0(2); |
|
293 |
+ CTF0(6); |
|
294 |
+ |
|
295 |
+#undef CTF0 |
|
296 |
+ |
|
297 |
+ x0 = vec_add(b60, b20); |
|
298 |
+ x1 = vec_add(b61, b21); |
|
299 |
+ |
|
300 |
+ cnst = LD_W2; |
|
301 |
+ x0 = vec_madd(cnst, x0, mzero); |
|
302 |
+ x1 = vec_madd(cnst, x1, mzero); |
|
303 |
+ cnst = LD_W1; |
|
304 |
+ b20 = vec_madd(cnst, b20, x0); |
|
305 |
+ b21 = vec_madd(cnst, b21, x1); |
|
306 |
+ cnst = LD_W0; |
|
307 |
+ b60 = vec_madd(cnst, b60, x0); |
|
308 |
+ b61 = vec_madd(cnst, b61, x1); |
|
309 |
+ |
|
310 |
+#define CTFX(x, b) \ |
|
311 |
+ b ## 0 = ((vector float) vec_unpackh(vs16(x))); \ |
|
312 |
+ b ## 1 = ((vector float) vec_unpackl(vs16(x))); \ |
|
313 |
+ b ## 0 = vec_ctf(vs32(b ## 0), 0); \ |
|
314 |
+ b ## 1 = vec_ctf(vs32(b ## 1), 0) |
|
315 |
+ |
|
316 |
+ CTFX(x4, b7); |
|
317 |
+ CTFX(x5, b5); |
|
318 |
+ CTFX(x6, b3); |
|
319 |
+ CTFX(x7, b1); |
|
320 |
+ |
|
321 |
+#undef CTFX |
|
322 |
+ |
|
323 |
+ x0 = vec_add(b70, b10); |
|
324 |
+ x1 = vec_add(b50, b30); |
|
325 |
+ x2 = vec_add(b70, b30); |
|
326 |
+ x3 = vec_add(b50, b10); |
|
327 |
+ x8 = vec_add(x2, x3); |
|
328 |
+ cnst = LD_W3; |
|
329 |
+ x8 = vec_madd(cnst, x8, mzero); |
|
330 |
+ |
|
331 |
+ cnst = LD_W8; |
|
332 |
+ x0 = vec_madd(cnst, x0, mzero); |
|
333 |
+ cnst = LD_W9; |
|
334 |
+ x1 = vec_madd(cnst, x1, mzero); |
|
335 |
+ cnst = LD_WA; |
|
336 |
+ x2 = vec_madd(cnst, x2, x8); |
|
337 |
+ cnst = LD_WB; |
|
338 |
+ x3 = vec_madd(cnst, x3, x8); |
|
339 |
+ |
|
340 |
+ cnst = LD_W4; |
|
341 |
+ b70 = vec_madd(cnst, b70, x0); |
|
342 |
+ cnst = LD_W5; |
|
343 |
+ b50 = vec_madd(cnst, b50, x1); |
|
344 |
+ cnst = LD_W6; |
|
345 |
+ b30 = vec_madd(cnst, b30, x1); |
|
346 |
+ cnst = LD_W7; |
|
347 |
+ b10 = vec_madd(cnst, b10, x0); |
|
348 |
+ |
|
349 |
+ b70 = vec_add(b70, x2); |
|
350 |
+ b50 = vec_add(b50, x3); |
|
351 |
+ b30 = vec_add(b30, x2); |
|
352 |
+ b10 = vec_add(b10, x3); |
|
353 |
+ |
|
354 |
+ x0 = vec_add(b71, b11); |
|
355 |
+ x1 = vec_add(b51, b31); |
|
356 |
+ x2 = vec_add(b71, b31); |
|
357 |
+ x3 = vec_add(b51, b11); |
|
358 |
+ x8 = vec_add(x2, x3); |
|
359 |
+ cnst = LD_W3; |
|
360 |
+ x8 = vec_madd(cnst, x8, mzero); |
|
361 |
+ |
|
362 |
+ cnst = LD_W8; |
|
363 |
+ x0 = vec_madd(cnst, x0, mzero); |
|
364 |
+ cnst = LD_W9; |
|
365 |
+ x1 = vec_madd(cnst, x1, mzero); |
|
366 |
+ cnst = LD_WA; |
|
367 |
+ x2 = vec_madd(cnst, x2, x8); |
|
368 |
+ cnst = LD_WB; |
|
369 |
+ x3 = vec_madd(cnst, x3, x8); |
|
370 |
+ |
|
371 |
+ cnst = LD_W4; |
|
372 |
+ b71 = vec_madd(cnst, b71, x0); |
|
373 |
+ cnst = LD_W5; |
|
374 |
+ b51 = vec_madd(cnst, b51, x1); |
|
375 |
+ cnst = LD_W6; |
|
376 |
+ b31 = vec_madd(cnst, b31, x1); |
|
377 |
+ cnst = LD_W7; |
|
378 |
+ b11 = vec_madd(cnst, b11, x0); |
|
379 |
+ |
|
380 |
+ b71 = vec_add(b71, x2); |
|
381 |
+ b51 = vec_add(b51, x3); |
|
382 |
+ b31 = vec_add(b31, x2); |
|
383 |
+ b11 = vec_add(b11, x3); |
|
384 |
+ /* }}} */ |
|
385 |
+ |
|
386 |
+ /* 8x8 matrix transpose (vector float[8][2]) {{{ */ |
|
387 |
+ x0 = vec_mergel(b00, b20); |
|
388 |
+ x1 = vec_mergeh(b00, b20); |
|
389 |
+ x2 = vec_mergel(b10, b30); |
|
390 |
+ x3 = vec_mergeh(b10, b30); |
|
391 |
+ |
|
392 |
+ b00 = vec_mergeh(x1, x3); |
|
393 |
+ b10 = vec_mergel(x1, x3); |
|
394 |
+ b20 = vec_mergeh(x0, x2); |
|
395 |
+ b30 = vec_mergel(x0, x2); |
|
396 |
+ |
|
397 |
+ x4 = vec_mergel(b41, b61); |
|
398 |
+ x5 = vec_mergeh(b41, b61); |
|
399 |
+ x6 = vec_mergel(b51, b71); |
|
400 |
+ x7 = vec_mergeh(b51, b71); |
|
401 |
+ |
|
402 |
+ b41 = vec_mergeh(x5, x7); |
|
403 |
+ b51 = vec_mergel(x5, x7); |
|
404 |
+ b61 = vec_mergeh(x4, x6); |
|
405 |
+ b71 = vec_mergel(x4, x6); |
|
406 |
+ |
|
407 |
+ x0 = vec_mergel(b01, b21); |
|
408 |
+ x1 = vec_mergeh(b01, b21); |
|
409 |
+ x2 = vec_mergel(b11, b31); |
|
410 |
+ x3 = vec_mergeh(b11, b31); |
|
411 |
+ |
|
412 |
+ x4 = vec_mergel(b40, b60); |
|
413 |
+ x5 = vec_mergeh(b40, b60); |
|
414 |
+ x6 = vec_mergel(b50, b70); |
|
415 |
+ x7 = vec_mergeh(b50, b70); |
|
416 |
+ |
|
417 |
+ b40 = vec_mergeh(x1, x3); |
|
418 |
+ b50 = vec_mergel(x1, x3); |
|
419 |
+ b60 = vec_mergeh(x0, x2); |
|
420 |
+ b70 = vec_mergel(x0, x2); |
|
421 |
+ |
|
422 |
+ b01 = vec_mergeh(x5, x7); |
|
423 |
+ b11 = vec_mergel(x5, x7); |
|
424 |
+ b21 = vec_mergeh(x4, x6); |
|
425 |
+ b31 = vec_mergel(x4, x6); |
|
426 |
+ /* }}} */ |
|
427 |
+ |
|
428 |
+ FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70); |
|
429 |
+ FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71); |
|
430 |
+ |
|
431 |
+ /* round, convert back to short {{{ */ |
|
432 |
+#define CTS(n) \ |
|
433 |
+ b ## n ## 0 = vec_round(b ## n ## 0); \ |
|
434 |
+ b ## n ## 1 = vec_round(b ## n ## 1); \ |
|
435 |
+ b ## n ## 0 = ((vector float) vec_cts(b ## n ## 0, 0)); \ |
|
436 |
+ b ## n ## 1 = ((vector float) vec_cts(b ## n ## 1, 0)); \ |
|
437 |
+ b ## n ## 0 = ((vector float) vec_pack(vs32(b ## n ## 0), \ |
|
438 |
+ vs32(b ## n ## 1))); \ |
|
439 |
+ vec_st(vs16(b ## n ## 0), 0, bp) |
|
440 |
+ |
|
441 |
+ bp = (vector signed short *) block; |
|
442 |
+ CTS(0); |
|
443 |
+ bp++; |
|
444 |
+ CTS(1); |
|
445 |
+ bp++; |
|
446 |
+ CTS(2); |
|
447 |
+ bp++; |
|
448 |
+ CTS(3); |
|
449 |
+ bp++; |
|
450 |
+ CTS(4); |
|
451 |
+ bp++; |
|
452 |
+ CTS(5); |
|
453 |
+ bp++; |
|
454 |
+ CTS(6); |
|
455 |
+ bp++; |
|
456 |
+ CTS(7); |
|
457 |
+ |
|
458 |
+#undef CTS |
|
459 |
+ /* }}} */ |
|
460 |
+} |
|
461 |
+ |
|
462 |
+#endif /* HAVE_ALTIVEC */ |
|
463 |
+ |
|
464 |
+av_cold void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx, |
|
465 |
+ unsigned high_bit_depth) |
|
466 |
+{ |
|
467 |
+#if HAVE_ALTIVEC |
|
468 |
+ if (!PPC_ALTIVEC(av_get_cpu_flags())) |
|
469 |
+ return; |
|
470 |
+ |
|
471 |
+ if (!high_bit_depth) { |
|
472 |
+ if (avctx->dct_algo == FF_DCT_AUTO || |
|
473 |
+ avctx->dct_algo == FF_DCT_ALTIVEC) { |
|
474 |
+ c->fdct = ff_fdct_altivec; |
|
475 |
+ } |
|
476 |
+ } |
|
477 |
+#endif /* HAVE_ALTIVEC */ |
|
478 |
+} |
0 | 479 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,26 @@ |
0 |
+/* |
|
1 |
+ * This file is part of FFmpeg. |
|
2 |
+ * |
|
3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#ifndef AVCODEC_PPC_FDCTDSP_H |
|
19 |
+#define AVCODEC_PPC_FDCTDSP_H |
|
20 |
+ |
|
21 |
+#include <stdint.h> |
|
22 |
+ |
|
23 |
+void ff_fdct_altivec(int16_t *block); |
|
24 |
+ |
|
25 |
+#endif /* AVCODEC_PPC_FDCTDSP_H */ |
... | ... |
@@ -32,6 +32,7 @@ |
32 | 32 |
#include "put_bits.h" |
33 | 33 |
#include "bytestream.h" |
34 | 34 |
#include "dsputil.h" |
35 |
+#include "fdctdsp.h" |
|
35 | 36 |
|
36 | 37 |
#define DEFAULT_SLICE_MB_WIDTH 8 |
37 | 38 |
|
... | ... |
@@ -145,7 +146,7 @@ static const uint8_t QMAT_CHROMA[4][64] = { |
145 | 145 |
|
146 | 146 |
|
147 | 147 |
typedef struct { |
148 |
- DSPContext dsp; |
|
148 |
+ FDCTDSPContext fdsp; |
|
149 | 149 |
uint8_t* fill_y; |
150 | 150 |
uint8_t* fill_u; |
151 | 151 |
uint8_t* fill_v; |
... | ... |
@@ -274,10 +275,10 @@ static void get(uint8_t *pixels, int stride, int16_t* block) |
274 | 274 |
} |
275 | 275 |
} |
276 | 276 |
|
277 |
-static void fdct_get(DSPContext *dsp, uint8_t *pixels, int stride, int16_t* block) |
|
277 |
+static void fdct_get(FDCTDSPContext *fdsp, uint8_t *pixels, int stride, int16_t* block) |
|
278 | 278 |
{ |
279 | 279 |
get(pixels, stride, block); |
280 |
- dsp->fdct(block); |
|
280 |
+ fdsp->fdct(block); |
|
281 | 281 |
} |
282 | 282 |
|
283 | 283 |
static int encode_slice_plane(AVCodecContext *avctx, int mb_count, |
... | ... |
@@ -285,18 +286,18 @@ static int encode_slice_plane(AVCodecContext *avctx, int mb_count, |
285 | 285 |
int *qmat, int chroma) |
286 | 286 |
{ |
287 | 287 |
ProresContext* ctx = avctx->priv_data; |
288 |
- DSPContext *dsp = &ctx->dsp; |
|
288 |
+ FDCTDSPContext *fdsp = &ctx->fdsp; |
|
289 | 289 |
DECLARE_ALIGNED(16, int16_t, blocks)[DEFAULT_SLICE_MB_WIDTH << 8], *block; |
290 | 290 |
int i, blocks_per_slice; |
291 | 291 |
PutBitContext pb; |
292 | 292 |
|
293 | 293 |
block = blocks; |
294 | 294 |
for (i = 0; i < mb_count; i++) { |
295 |
- fdct_get(dsp, src, src_stride, block + (0 << 6)); |
|
296 |
- fdct_get(dsp, src + 8 * src_stride, src_stride, block + ((2 - chroma) << 6)); |
|
295 |
+ fdct_get(fdsp, src, src_stride, block + (0 << 6)); |
|
296 |
+ fdct_get(fdsp, src + 8 * src_stride, src_stride, block + ((2 - chroma) << 6)); |
|
297 | 297 |
if (!chroma) { |
298 |
- fdct_get(dsp, src + 16, src_stride, block + (1 << 6)); |
|
299 |
- fdct_get(dsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6)); |
|
298 |
+ fdct_get(fdsp, src + 16, src_stride, block + (1 << 6)); |
|
299 |
+ fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6)); |
|
300 | 300 |
} |
301 | 301 |
|
302 | 302 |
block += (256 >> chroma); |
... | ... |
@@ -576,7 +577,7 @@ static av_cold int prores_encode_init(AVCodecContext *avctx) |
576 | 576 |
return -1; |
577 | 577 |
} |
578 | 578 |
|
579 |
- ff_dsputil_init(&ctx->dsp, avctx); |
|
579 |
+ ff_fdctdsp_init(&ctx->fdsp, avctx); |
|
580 | 580 |
|
581 | 581 |
avctx->codec_tag = AV_RL32((const uint8_t*)profiles[avctx->profile].name); |
582 | 582 |
|
... | ... |
@@ -26,8 +26,7 @@ |
26 | 26 |
#include "libavutil/opt.h" |
27 | 27 |
#include "libavutil/pixdesc.h" |
28 | 28 |
#include "avcodec.h" |
29 |
-#include "dct.h" |
|
30 |
-#include "dsputil.h" |
|
29 |
+#include "fdctdsp.h" |
|
31 | 30 |
#include "put_bits.h" |
32 | 31 |
#include "bytestream.h" |
33 | 32 |
#include "internal.h" |
... | ... |
@@ -195,9 +194,9 @@ typedef struct ProresContext { |
195 | 195 |
const uint8_t *quant_mat; |
196 | 196 |
const uint8_t *scantable; |
197 | 197 |
|
198 |
- void (* fdct)(DSPContext *dsp, const uint16_t *src, |
|
199 |
- int linesize, int16_t *block); |
|
200 |
- DSPContext dsp; |
|
198 |
+ void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src, |
|
199 |
+ int linesize, int16_t *block); |
|
200 |
+ FDCTDSPContext fdsp; |
|
201 | 201 |
|
202 | 202 |
int mb_width, mb_height; |
203 | 203 |
int mbs_per_slice; |
... | ... |
@@ -266,27 +265,27 @@ static void get_slice_data(ProresContext *ctx, const uint16_t *src, |
266 | 266 |
mb_width * sizeof(*emu_buf)); |
267 | 267 |
} |
268 | 268 |
if (!is_chroma) { |
269 |
- ctx->fdct(&ctx->dsp, esrc, elinesize, blocks); |
|
269 |
+ ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks); |
|
270 | 270 |
blocks += 64; |
271 | 271 |
if (blocks_per_mb > 2) { |
272 |
- ctx->fdct(&ctx->dsp, esrc + 8, elinesize, blocks); |
|
272 |
+ ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks); |
|
273 | 273 |
blocks += 64; |
274 | 274 |
} |
275 |
- ctx->fdct(&ctx->dsp, esrc + elinesize * 4, elinesize, blocks); |
|
275 |
+ ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks); |
|
276 | 276 |
blocks += 64; |
277 | 277 |
if (blocks_per_mb > 2) { |
278 |
- ctx->fdct(&ctx->dsp, esrc + elinesize * 4 + 8, elinesize, blocks); |
|
278 |
+ ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks); |
|
279 | 279 |
blocks += 64; |
280 | 280 |
} |
281 | 281 |
} else { |
282 |
- ctx->fdct(&ctx->dsp, esrc, elinesize, blocks); |
|
282 |
+ ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks); |
|
283 | 283 |
blocks += 64; |
284 |
- ctx->fdct(&ctx->dsp, esrc + elinesize * 4, elinesize, blocks); |
|
284 |
+ ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks); |
|
285 | 285 |
blocks += 64; |
286 | 286 |
if (blocks_per_mb > 2) { |
287 |
- ctx->fdct(&ctx->dsp, esrc + 8, elinesize, blocks); |
|
287 |
+ ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks); |
|
288 | 288 |
blocks += 64; |
289 |
- ctx->fdct(&ctx->dsp, esrc + elinesize * 4 + 8, elinesize, blocks); |
|
289 |
+ ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks); |
|
290 | 290 |
blocks += 64; |
291 | 291 |
} |
292 | 292 |
} |
... | ... |
@@ -1066,7 +1065,7 @@ static av_cold int encode_close(AVCodecContext *avctx) |
1066 | 1066 |
return 0; |
1067 | 1067 |
} |
1068 | 1068 |
|
1069 |
-static void prores_fdct(DSPContext *dsp, const uint16_t *src, |
|
1069 |
+static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src, |
|
1070 | 1070 |
int linesize, int16_t *block) |
1071 | 1071 |
{ |
1072 | 1072 |
int x, y; |
... | ... |
@@ -1077,7 +1076,7 @@ static void prores_fdct(DSPContext *dsp, const uint16_t *src, |
1077 | 1077 |
block[y * 8 + x] = tsrc[x]; |
1078 | 1078 |
tsrc += linesize >> 1; |
1079 | 1079 |
} |
1080 |
- dsp->fdct(block); |
|
1080 |
+ fdsp->fdct(block); |
|
1081 | 1081 |
} |
1082 | 1082 |
|
1083 | 1083 |
static av_cold int encode_init(AVCodecContext *avctx) |
... | ... |
@@ -1096,7 +1095,7 @@ static av_cold int encode_init(AVCodecContext *avctx) |
1096 | 1096 |
ctx->fdct = prores_fdct; |
1097 | 1097 |
ctx->scantable = interlaced ? ff_prores_interlaced_scan |
1098 | 1098 |
: ff_prores_progressive_scan; |
1099 |
- ff_dsputil_init(&ctx->dsp, avctx); |
|
1099 |
+ ff_fdctdsp_init(&ctx->fdsp, avctx); |
|
1100 | 1100 |
|
1101 | 1101 |
mps = ctx->mbs_per_slice; |
1102 | 1102 |
if (mps & (mps - 1)) { |
... | ... |
@@ -9,6 +9,7 @@ OBJS-$(CONFIG_DCT) += x86/dct_init.o |
9 | 9 |
OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o |
10 | 10 |
OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \ |
11 | 11 |
x86/motion_est.o |
12 |
+OBJS-$(CONFIG_FDCTDSP) += x86/fdctdsp_init.o |
|
12 | 13 |
OBJS-$(CONFIG_FFT) += x86/fft_init.o |
13 | 14 |
OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp_init.o |
14 | 15 |
OBJS-$(CONFIG_FLAC_ENCODER) += x86/flacdsp_init.o |
... | ... |
@@ -59,7 +60,7 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o |
59 | 59 |
OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o |
60 | 60 |
|
61 | 61 |
MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o |
62 |
-MMX-OBJS-$(CONFIG_ENCODERS) += x86/fdct.o |
|
62 |
+MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o |
|
63 | 63 |
MMX-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_mmx.o \ |
64 | 64 |
x86/idct_mmx_xvid.o \ |
65 | 65 |
x86/idct_sse2_xvid.o \ |
... | ... |
@@ -26,7 +26,6 @@ |
26 | 26 |
#include "libavutil/cpu.h" |
27 | 27 |
#include "libavutil/x86/asm.h" |
28 | 28 |
#include "libavutil/x86/cpu.h" |
29 |
-#include "libavcodec/dct.h" |
|
30 | 29 |
#include "libavcodec/dsputil.h" |
31 | 30 |
#include "libavcodec/mpegvideo.h" |
32 | 31 |
#include "dsputil_x86.h" |
... | ... |
@@ -353,7 +352,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, |
353 | 353 |
unsigned high_bit_depth) |
354 | 354 |
{ |
355 | 355 |
int cpu_flags = av_get_cpu_flags(); |
356 |
- const int dct_algo = avctx->dct_algo; |
|
357 | 356 |
|
358 | 357 |
if (EXTERNAL_MMX(cpu_flags)) { |
359 | 358 |
if (!high_bit_depth) |
... | ... |
@@ -367,10 +365,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, |
367 | 367 |
|
368 | 368 |
#if HAVE_INLINE_ASM |
369 | 369 |
if (INLINE_MMX(cpu_flags)) { |
370 |
- if (!high_bit_depth && |
|
371 |
- (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) |
|
372 |
- c->fdct = ff_fdct_mmx; |
|
373 |
- |
|
374 | 370 |
c->vsad[4] = vsad_intra16_mmx; |
375 | 371 |
|
376 | 372 |
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { |
... | ... |
@@ -379,10 +373,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, |
379 | 379 |
} |
380 | 380 |
|
381 | 381 |
if (INLINE_MMXEXT(cpu_flags)) { |
382 |
- if (!high_bit_depth && |
|
383 |
- (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) |
|
384 |
- c->fdct = ff_fdct_mmxext; |
|
385 |
- |
|
386 | 382 |
c->vsad[4] = vsad_intra16_mmxext; |
387 | 383 |
|
388 | 384 |
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { |
... | ... |
@@ -391,9 +381,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, |
391 | 391 |
} |
392 | 392 |
|
393 | 393 |
if (INLINE_SSE2(cpu_flags)) { |
394 |
- if (!high_bit_depth && |
|
395 |
- (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) |
|
396 |
- c->fdct = ff_fdct_sse2; |
|
397 | 394 |
} |
398 | 395 |
|
399 | 396 |
#if HAVE_SSSE3_INLINE |
400 | 397 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,44 @@ |
0 |
+/* |
|
1 |
+ * This file is part of FFmpeg. |
|
2 |
+ * |
|
3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include "libavutil/attributes.h" |
|
19 |
+#include "libavutil/cpu.h" |
|
20 |
+#include "libavutil/x86/cpu.h" |
|
21 |
+#include "libavcodec/avcodec.h" |
|
22 |
+#include "libavcodec/dct.h" |
|
23 |
+#include "libavcodec/fdctdsp.h" |
|
24 |
+ |
|
25 |
+av_cold void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx, |
|
26 |
+ unsigned high_bit_depth) |
|
27 |
+{ |
|
28 |
+ int cpu_flags = av_get_cpu_flags(); |
|
29 |
+ const int dct_algo = avctx->dct_algo; |
|
30 |
+ |
|
31 |
+ if (!high_bit_depth) { |
|
32 |
+ if ((dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) { |
|
33 |
+ if (INLINE_MMX(cpu_flags)) |
|
34 |
+ c->fdct = ff_fdct_mmx; |
|
35 |
+ |
|
36 |
+ if (INLINE_MMXEXT(cpu_flags)) |
|
37 |
+ c->fdct = ff_fdct_mmxext; |
|
38 |
+ |
|
39 |
+ if (INLINE_SSE2(cpu_flags)) |
|
40 |
+ c->fdct = ff_fdct_sse2; |
|
41 |
+ } |
|
42 |
+ } |
|
43 |
+} |
... | ... |
@@ -233,7 +233,7 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, |
233 | 233 |
const int y1 = y + offset[i + count - 1][1]; |
234 | 234 |
const int index = x1 + y1*linesize; |
235 | 235 |
p->dsp.get_pixels(block, p->src + index, linesize); |
236 |
- p->dsp.fdct(block); |
|
236 |
+ p->fdsp.fdct(block); |
|
237 | 237 |
p->requantize(block2, block, qp, p->idsp.idct_permutation); |
238 | 238 |
p->idsp.idct(block2); |
239 | 239 |
add_block(p->temp + index, linesize, block2); |
... | ... |
@@ -382,6 +382,7 @@ static av_cold int init(AVFilterContext *ctx) |
382 | 382 |
return AVERROR(ENOMEM); |
383 | 383 |
avpriv_dsputil_init(&spp->dsp, spp->avctx); |
384 | 384 |
ff_idctdsp_init(&spp->idsp, spp->avctx); |
385 |
+ ff_fdctdsp_init(&spp->fdsp, spp->avctx); |
|
385 | 386 |
spp->store_slice = store_slice_c; |
386 | 387 |
switch (spp->mode) { |
387 | 388 |
case MODE_HARD: spp->requantize = hardthresh_c; break; |
... | ... |
@@ -25,6 +25,7 @@ |
25 | 25 |
#include "libavcodec/avcodec.h" |
26 | 26 |
#include "libavcodec/dsputil.h" |
27 | 27 |
#include "libavcodec/idctdsp.h" |
28 |
+#include "libavcodec/fdctdsp.h" |
|
28 | 29 |
#include "avfilter.h" |
29 | 30 |
|
30 | 31 |
#define MAX_LEVEL 6 /* quality levels */ |
... | ... |
@@ -42,6 +43,7 @@ typedef struct { |
42 | 42 |
AVCodecContext *avctx; |
43 | 43 |
DSPContext dsp; |
44 | 44 |
IDCTDSPContext idsp; |
45 |
+ FDCTDSPContext fdsp; |
|
45 | 46 |
int8_t *non_b_qp_table; |
46 | 47 |
int non_b_qp_alloc_size; |
47 | 48 |
int use_bframe_qp; |