... | ... |
@@ -1638,7 +1638,7 @@ wmv3_vdpau_decoder_select="vc1_vdpau_decoder" |
1638 | 1638 |
wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel" |
1639 | 1639 |
|
1640 | 1640 |
# parsers |
1641 |
-h264_parser_select="error_resilience golomb h264dsp h264pred h264qpel mpegvideo" |
|
1641 |
+h264_parser_select="error_resilience golomb h264chroma h264dsp h264pred h264qpel mpegvideo" |
|
1642 | 1642 |
mpeg4video_parser_select="error_resilience mpegvideo" |
1643 | 1643 |
mpegvideo_parser_select="error_resilience mpegvideo" |
1644 | 1644 |
vc1_parser_select="error_resilience mpegvideo" |
... | ... |
@@ -44,6 +44,7 @@ FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o |
44 | 44 |
OBJS-$(CONFIG_FFT) += avfft.o fft_fixed.o fft_float.o \ |
45 | 45 |
$(FFT-OBJS-yes) |
46 | 46 |
OBJS-$(CONFIG_GOLOMB) += golomb.o |
47 |
+OBJS-$(CONFIG_H264CHROMA) += h264chroma.o |
|
47 | 48 |
OBJS-$(CONFIG_H264DSP) += h264dsp.o h264idct.o |
48 | 49 |
OBJS-$(CONFIG_H264PRED) += h264pred.o |
49 | 50 |
OBJS-$(CONFIG_H264QPEL) += h264qpel.o |
... | ... |
@@ -26,6 +26,7 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \ |
26 | 26 |
arm/vp8dsp_init_armv6.o \ |
27 | 27 |
arm/vp8dsp_armv6.o |
28 | 28 |
|
29 |
+OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o |
|
29 | 30 |
OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o |
30 | 31 |
OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o |
31 | 32 |
OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o |
... | ... |
@@ -67,9 +68,9 @@ NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \ |
67 | 67 |
|
68 | 68 |
NEON-OBJS-$(CONFIG_RDFT) += arm/rdft_neon.o \ |
69 | 69 |
|
70 |
+NEON-OBJS-$(CONFIG_H264CHROMA) += arm/h264cmc_neon.o |
|
70 | 71 |
NEON-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_neon.o \ |
71 | 72 |
arm/h264idct_neon.o \ |
72 |
- arm/h264cmc_neon.o \ |
|
73 | 73 |
|
74 | 74 |
NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o \ |
75 | 75 |
|
... | ... |
@@ -87,7 +88,6 @@ NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o |
87 | 87 |
NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o |
88 | 88 |
NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \ |
89 | 89 |
arm/rv40dsp_neon.o \ |
90 |
- arm/h264cmc_neon.o \ |
|
91 | 90 |
|
92 | 91 |
NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o |
93 | 92 |
|
... | ... |
@@ -64,14 +64,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); |
64 | 64 |
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); |
65 | 65 |
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); |
66 | 66 |
|
67 |
-void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
68 |
-void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
69 |
-void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
70 |
- |
|
71 |
-void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
72 |
-void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
73 |
-void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
74 |
- |
|
75 | 67 |
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, |
76 | 68 |
int len); |
77 | 69 |
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, |
... | ... |
@@ -139,16 +131,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) |
139 | 139 |
c->put_pixels_clamped = ff_put_pixels_clamped_neon; |
140 | 140 |
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; |
141 | 141 |
|
142 |
- if (CONFIG_H264_DECODER && !high_bit_depth) { |
|
143 |
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon; |
|
144 |
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon; |
|
145 |
- c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon; |
|
146 |
- |
|
147 |
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon; |
|
148 |
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon; |
|
149 |
- c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon; |
|
150 |
- } |
|
151 |
- |
|
152 | 142 |
c->vector_clipf = ff_vector_clipf_neon; |
153 | 143 |
c->vector_clip_int32 = ff_vector_clip_int32_neon; |
154 | 144 |
|
155 | 145 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,51 @@ |
0 |
+/* |
|
1 |
+ * ARM NEON optimised H.264 chroma functions |
|
2 |
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
|
3 |
+ * |
|
4 |
+ * This file is part of Libav. |
|
5 |
+ * |
|
6 |
+ * Libav is free software; you can redistribute it and/or |
|
7 |
+ * modify it under the terms of the GNU Lesser General Public |
|
8 |
+ * License as published by the Free Software Foundation; either |
|
9 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
10 |
+ * |
|
11 |
+ * Libav is distributed in the hope that it will be useful, |
|
12 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 |
+ * Lesser General Public License for more details. |
|
15 |
+ * |
|
16 |
+ * You should have received a copy of the GNU Lesser General Public |
|
17 |
+ * License along with Libav; if not, write to the Free Software |
|
18 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 |
+ */ |
|
20 |
+ |
|
21 |
+#include <stdint.h> |
|
22 |
+ |
|
23 |
+#include "libavutil/attributes.h" |
|
24 |
+#include "libavutil/cpu.h" |
|
25 |
+#include "libavutil/arm/cpu.h" |
|
26 |
+#include "libavcodec/h264chroma.h" |
|
27 |
+ |
|
28 |
+void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
29 |
+void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
30 |
+void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
31 |
+ |
|
32 |
+void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
33 |
+void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
34 |
+void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
35 |
+ |
|
36 |
+av_cold void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth) |
|
37 |
+{ |
|
38 |
+ const int high_bit_depth = bit_depth > 8; |
|
39 |
+ int cpu_flags = av_get_cpu_flags(); |
|
40 |
+ |
|
41 |
+ if (have_neon(cpu_flags) && !high_bit_depth) { |
|
42 |
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon; |
|
43 |
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon; |
|
44 |
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon; |
|
45 |
+ |
|
46 |
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon; |
|
47 |
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon; |
|
48 |
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon; |
|
49 |
+ } |
|
50 |
+} |
... | ... |
@@ -28,6 +28,7 @@ |
28 | 28 |
#include "avcodec.h" |
29 | 29 |
#include "get_bits.h" |
30 | 30 |
#include "golomb.h" |
31 |
+#include "h264chroma.h" |
|
31 | 32 |
#include "mathops.h" |
32 | 33 |
#include "cavs.h" |
33 | 34 |
|
... | ... |
@@ -464,30 +465,35 @@ void ff_cavs_inter(AVSContext *h, enum cavs_mb mb_type) { |
464 | 464 |
if(ff_cavs_partition_flags[mb_type] == 0){ // 16x16 |
465 | 465 |
mc_part_std(h, 8, 0, h->cy, h->cu, h->cv, 0, 0, |
466 | 466 |
h->cdsp.put_cavs_qpel_pixels_tab[0], |
467 |
- h->dsp.put_h264_chroma_pixels_tab[0], |
|
467 |
+ h->h264chroma.put_h264_chroma_pixels_tab[0], |
|
468 | 468 |
h->cdsp.avg_cavs_qpel_pixels_tab[0], |
469 |
- h->dsp.avg_h264_chroma_pixels_tab[0],&h->mv[MV_FWD_X0]); |
|
469 |
+ h->h264chroma.avg_h264_chroma_pixels_tab[0], |
|
470 |
+ &h->mv[MV_FWD_X0]); |
|
470 | 471 |
}else{ |
471 | 472 |
mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 0, 0, |
472 | 473 |
h->cdsp.put_cavs_qpel_pixels_tab[1], |
473 |
- h->dsp.put_h264_chroma_pixels_tab[1], |
|
474 |
+ h->h264chroma.put_h264_chroma_pixels_tab[1], |
|
474 | 475 |
h->cdsp.avg_cavs_qpel_pixels_tab[1], |
475 |
- h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X0]); |
|
476 |
+ h->h264chroma.avg_h264_chroma_pixels_tab[1], |
|
477 |
+ &h->mv[MV_FWD_X0]); |
|
476 | 478 |
mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 4, 0, |
477 | 479 |
h->cdsp.put_cavs_qpel_pixels_tab[1], |
478 |
- h->dsp.put_h264_chroma_pixels_tab[1], |
|
480 |
+ h->h264chroma.put_h264_chroma_pixels_tab[1], |
|
479 | 481 |
h->cdsp.avg_cavs_qpel_pixels_tab[1], |
480 |
- h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X1]); |
|
482 |
+ h->h264chroma.avg_h264_chroma_pixels_tab[1], |
|
483 |
+ &h->mv[MV_FWD_X1]); |
|
481 | 484 |
mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 0, 4, |
482 | 485 |
h->cdsp.put_cavs_qpel_pixels_tab[1], |
483 |
- h->dsp.put_h264_chroma_pixels_tab[1], |
|
486 |
+ h->h264chroma.put_h264_chroma_pixels_tab[1], |
|
484 | 487 |
h->cdsp.avg_cavs_qpel_pixels_tab[1], |
485 |
- h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X2]); |
|
488 |
+ h->h264chroma.avg_h264_chroma_pixels_tab[1], |
|
489 |
+ &h->mv[MV_FWD_X2]); |
|
486 | 490 |
mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 4, 4, |
487 | 491 |
h->cdsp.put_cavs_qpel_pixels_tab[1], |
488 |
- h->dsp.put_h264_chroma_pixels_tab[1], |
|
492 |
+ h->h264chroma.put_h264_chroma_pixels_tab[1], |
|
489 | 493 |
h->cdsp.avg_cavs_qpel_pixels_tab[1], |
490 |
- h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X3]); |
|
494 |
+ h->h264chroma.avg_h264_chroma_pixels_tab[1], |
|
495 |
+ &h->mv[MV_FWD_X3]); |
|
491 | 496 |
} |
492 | 497 |
} |
493 | 498 |
|
... | ... |
@@ -720,6 +726,7 @@ av_cold int ff_cavs_init(AVCodecContext *avctx) { |
720 | 720 |
AVSContext *h = avctx->priv_data; |
721 | 721 |
|
722 | 722 |
ff_dsputil_init(&h->dsp, avctx); |
723 |
+ ff_h264chroma_init(&h->h264chroma, 8); |
|
723 | 724 |
ff_videodsp_init(&h->vdsp, 8); |
724 | 725 |
ff_cavsdsp_init(&h->cdsp, avctx); |
725 | 726 |
ff_init_scantable_permutation(h->dsp.idct_permutation, |
... | ... |
@@ -24,6 +24,7 @@ |
24 | 24 |
|
25 | 25 |
#include "cavsdsp.h" |
26 | 26 |
#include "dsputil.h" |
27 |
+#include "h264chroma.h" |
|
27 | 28 |
#include "get_bits.h" |
28 | 29 |
#include "videodsp.h" |
29 | 30 |
|
... | ... |
@@ -161,6 +162,7 @@ typedef struct AVSFrame { |
161 | 161 |
typedef struct AVSContext { |
162 | 162 |
AVCodecContext *avctx; |
163 | 163 |
DSPContext dsp; |
164 |
+ H264ChromaContext h264chroma; |
|
164 | 165 |
VideoDSPContext vdsp; |
165 | 166 |
CAVSDSPContext cdsp; |
166 | 167 |
GetBitContext gb; |
... | ... |
@@ -2719,13 +2719,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) |
2719 | 2719 |
c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\ |
2720 | 2720 |
c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\ |
2721 | 2721 |
c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\ |
2722 |
-\ |
|
2723 |
- c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\ |
|
2724 |
- c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\ |
|
2725 |
- c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\ |
|
2726 |
- c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\ |
|
2727 |
- c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\ |
|
2728 |
- c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth) |
|
2729 | 2722 |
|
2730 | 2723 |
switch (avctx->bits_per_raw_sample) { |
2731 | 2724 |
case 9: |
... | ... |
@@ -140,7 +140,6 @@ void clear_blocks_c(int16_t *blocks); |
140 | 140 |
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, ptrdiff_t line_size, int h); |
141 | 141 |
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); |
142 | 142 |
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); |
143 |
-typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); |
|
144 | 143 |
|
145 | 144 |
typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h); |
146 | 145 |
|
... | ... |
@@ -308,12 +307,6 @@ typedef struct DSPContext { |
308 | 308 |
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; |
309 | 309 |
qpel_mc_func put_mspel_pixels_tab[8]; |
310 | 310 |
|
311 |
- /** |
|
312 |
- * h264 Chroma MC |
|
313 |
- */ |
|
314 |
- h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; |
|
315 |
- h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; |
|
316 |
- |
|
317 | 311 |
me_cmp_func pix_abs[2][4]; |
318 | 312 |
|
319 | 313 |
/* huffyuv specific */ |
... | ... |
@@ -463,124 +463,6 @@ PIXOP2(put, op_put) |
463 | 463 |
#undef op_avg |
464 | 464 |
#undef op_put |
465 | 465 |
|
466 |
-#define H264_CHROMA_MC(OPNAME, OP)\ |
|
467 |
-static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ |
|
468 |
- pixel *dst = (pixel*)_dst;\ |
|
469 |
- pixel *src = (pixel*)_src;\ |
|
470 |
- const int A=(8-x)*(8-y);\ |
|
471 |
- const int B=( x)*(8-y);\ |
|
472 |
- const int C=(8-x)*( y);\ |
|
473 |
- const int D=( x)*( y);\ |
|
474 |
- int i;\ |
|
475 |
- stride /= sizeof(pixel);\ |
|
476 |
- \ |
|
477 |
- assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
478 |
-\ |
|
479 |
- if(D){\ |
|
480 |
- for(i=0; i<h; i++){\ |
|
481 |
- OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
|
482 |
- OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ |
|
483 |
- dst+= stride;\ |
|
484 |
- src+= stride;\ |
|
485 |
- }\ |
|
486 |
- }else{\ |
|
487 |
- const int E= B+C;\ |
|
488 |
- const int step= C ? stride : 1;\ |
|
489 |
- for(i=0; i<h; i++){\ |
|
490 |
- OP(dst[0], (A*src[0] + E*src[step+0]));\ |
|
491 |
- OP(dst[1], (A*src[1] + E*src[step+1]));\ |
|
492 |
- dst+= stride;\ |
|
493 |
- src+= stride;\ |
|
494 |
- }\ |
|
495 |
- }\ |
|
496 |
-}\ |
|
497 |
-\ |
|
498 |
-static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ |
|
499 |
- pixel *dst = (pixel*)_dst;\ |
|
500 |
- pixel *src = (pixel*)_src;\ |
|
501 |
- const int A=(8-x)*(8-y);\ |
|
502 |
- const int B=( x)*(8-y);\ |
|
503 |
- const int C=(8-x)*( y);\ |
|
504 |
- const int D=( x)*( y);\ |
|
505 |
- int i;\ |
|
506 |
- stride /= sizeof(pixel);\ |
|
507 |
- \ |
|
508 |
- assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
509 |
-\ |
|
510 |
- if(D){\ |
|
511 |
- for(i=0; i<h; i++){\ |
|
512 |
- OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
|
513 |
- OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ |
|
514 |
- OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ |
|
515 |
- OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ |
|
516 |
- dst+= stride;\ |
|
517 |
- src+= stride;\ |
|
518 |
- }\ |
|
519 |
- }else{\ |
|
520 |
- const int E= B+C;\ |
|
521 |
- const int step= C ? stride : 1;\ |
|
522 |
- for(i=0; i<h; i++){\ |
|
523 |
- OP(dst[0], (A*src[0] + E*src[step+0]));\ |
|
524 |
- OP(dst[1], (A*src[1] + E*src[step+1]));\ |
|
525 |
- OP(dst[2], (A*src[2] + E*src[step+2]));\ |
|
526 |
- OP(dst[3], (A*src[3] + E*src[step+3]));\ |
|
527 |
- dst+= stride;\ |
|
528 |
- src+= stride;\ |
|
529 |
- }\ |
|
530 |
- }\ |
|
531 |
-}\ |
|
532 |
-\ |
|
533 |
-static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ |
|
534 |
- pixel *dst = (pixel*)_dst;\ |
|
535 |
- pixel *src = (pixel*)_src;\ |
|
536 |
- const int A=(8-x)*(8-y);\ |
|
537 |
- const int B=( x)*(8-y);\ |
|
538 |
- const int C=(8-x)*( y);\ |
|
539 |
- const int D=( x)*( y);\ |
|
540 |
- int i;\ |
|
541 |
- stride /= sizeof(pixel);\ |
|
542 |
- \ |
|
543 |
- assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
544 |
-\ |
|
545 |
- if(D){\ |
|
546 |
- for(i=0; i<h; i++){\ |
|
547 |
- OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
|
548 |
- OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ |
|
549 |
- OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ |
|
550 |
- OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ |
|
551 |
- OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ |
|
552 |
- OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ |
|
553 |
- OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ |
|
554 |
- OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ |
|
555 |
- dst+= stride;\ |
|
556 |
- src+= stride;\ |
|
557 |
- }\ |
|
558 |
- }else{\ |
|
559 |
- const int E= B+C;\ |
|
560 |
- const int step= C ? stride : 1;\ |
|
561 |
- for(i=0; i<h; i++){\ |
|
562 |
- OP(dst[0], (A*src[0] + E*src[step+0]));\ |
|
563 |
- OP(dst[1], (A*src[1] + E*src[step+1]));\ |
|
564 |
- OP(dst[2], (A*src[2] + E*src[step+2]));\ |
|
565 |
- OP(dst[3], (A*src[3] + E*src[step+3]));\ |
|
566 |
- OP(dst[4], (A*src[4] + E*src[step+4]));\ |
|
567 |
- OP(dst[5], (A*src[5] + E*src[step+5]));\ |
|
568 |
- OP(dst[6], (A*src[6] + E*src[step+6]));\ |
|
569 |
- OP(dst[7], (A*src[7] + E*src[step+7]));\ |
|
570 |
- dst+= stride;\ |
|
571 |
- src+= stride;\ |
|
572 |
- }\ |
|
573 |
- }\ |
|
574 |
-} |
|
575 |
- |
|
576 |
-#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) |
|
577 |
-#define op_put(a, b) a = (((b) + 32)>>6) |
|
578 |
- |
|
579 |
-H264_CHROMA_MC(put_ , op_put) |
|
580 |
-H264_CHROMA_MC(avg_ , op_avg) |
|
581 |
-#undef op_avg |
|
582 |
-#undef op_put |
|
583 |
- |
|
584 | 466 |
void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) { |
585 | 467 |
FUNCC(put_pixels8)(dst, src, stride, 8); |
586 | 468 |
} |
... | ... |
@@ -34,6 +34,7 @@ |
34 | 34 |
#include "mpegvideo.h" |
35 | 35 |
#include "h264.h" |
36 | 36 |
#include "h264data.h" |
37 |
+#include "h264chroma.h" |
|
37 | 38 |
#include "h264_mvpred.h" |
38 | 39 |
#include "golomb.h" |
39 | 40 |
#include "mathops.h" |
... | ... |
@@ -976,6 +977,7 @@ static av_cold void common_init(H264Context *h) |
976 | 976 |
s->codec_id = s->avctx->codec->id; |
977 | 977 |
|
978 | 978 |
ff_h264dsp_init(&h->h264dsp, 8, 1); |
979 |
+ ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma); |
|
979 | 980 |
ff_h264qpel_init(&h->h264qpel, 8); |
980 | 981 |
ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1); |
981 | 982 |
|
... | ... |
@@ -2445,6 +2447,7 @@ static int h264_set_parameter_from_sps(H264Context *h) |
2445 | 2445 |
|
2446 | 2446 |
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, |
2447 | 2447 |
h->sps.chroma_format_idc); |
2448 |
+ ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma); |
|
2448 | 2449 |
ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma); |
2449 | 2450 |
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, |
2450 | 2451 |
h->sps.chroma_format_idc); |
... | ... |
@@ -31,6 +31,7 @@ |
31 | 31 |
#include "libavutil/intreadwrite.h" |
32 | 32 |
#include "cabac.h" |
33 | 33 |
#include "mpegvideo.h" |
34 |
+#include "h264chroma.h" |
|
34 | 35 |
#include "h264dsp.h" |
35 | 36 |
#include "h264pred.h" |
36 | 37 |
#include "h264qpel.h" |
... | ... |
@@ -254,6 +255,7 @@ typedef struct MMCO { |
254 | 254 |
typedef struct H264Context { |
255 | 255 |
MpegEncContext s; |
256 | 256 |
H264DSPContext h264dsp; |
257 |
+ H264ChromaContext h264chroma; |
|
257 | 258 |
H264QpelContext h264qpel; |
258 | 259 |
int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264 |
259 | 260 |
int chroma_qp[2]; // QPc |
... | ... |
@@ -176,14 +176,14 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) |
176 | 176 |
} else if (is_h264) { |
177 | 177 |
if (chroma422) { |
178 | 178 |
FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr, |
179 |
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, |
|
180 |
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, |
|
179 |
+ s->me.qpel_put, h->h264chroma.put_h264_chroma_pixels_tab, |
|
180 |
+ s->me.qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab, |
|
181 | 181 |
h->h264dsp.weight_h264_pixels_tab, |
182 | 182 |
h->h264dsp.biweight_h264_pixels_tab); |
183 | 183 |
} else { |
184 | 184 |
FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr, |
185 |
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, |
|
186 |
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, |
|
185 |
+ s->me.qpel_put, h->h264chroma.put_h264_chroma_pixels_tab, |
|
186 |
+ s->me.qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab, |
|
187 | 187 |
h->h264dsp.weight_h264_pixels_tab, |
188 | 188 |
h->h264dsp.biweight_h264_pixels_tab); |
189 | 189 |
} |
... | ... |
@@ -360,8 +360,8 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h) |
360 | 360 |
linesize, 0, 1, SIMPLE, PIXEL_SHIFT); |
361 | 361 |
} else { |
362 | 362 |
FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2], |
363 |
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, |
|
364 |
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, |
|
363 |
+ s->me.qpel_put, h->h264chroma.put_h264_chroma_pixels_tab, |
|
364 |
+ s->me.qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab, |
|
365 | 365 |
h->h264dsp.weight_h264_pixels_tab, |
366 | 366 |
h->h264dsp.biweight_h264_pixels_tab); |
367 | 367 |
} |
368 | 368 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,64 @@ |
0 |
+/* |
|
1 |
+ * This file is part of Libav. |
|
2 |
+ * |
|
3 |
+ * Libav is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * Libav is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with Libav; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include "config.h" |
|
19 |
+#include "h264chroma.h" |
|
20 |
+ |
|
21 |
+#define BIT_DEPTH 8 |
|
22 |
+#include "h264chroma_template.c" |
|
23 |
+#undef BIT_DEPTH |
|
24 |
+ |
|
25 |
+#define BIT_DEPTH 9 |
|
26 |
+#include "h264chroma_template.c" |
|
27 |
+#undef BIT_DEPTH |
|
28 |
+ |
|
29 |
+#define BIT_DEPTH 10 |
|
30 |
+#include "h264chroma_template.c" |
|
31 |
+#undef BIT_DEPTH |
|
32 |
+ |
|
33 |
+#define SET_CHROMA(depth) \ |
|
34 |
+ c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_ ## depth ## _c; \ |
|
35 |
+ c->put_h264_chroma_pixels_tab[1] = put_h264_chroma_mc4_ ## depth ## _c; \ |
|
36 |
+ c->put_h264_chroma_pixels_tab[2] = put_h264_chroma_mc2_ ## depth ## _c; \ |
|
37 |
+ c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_ ## depth ## _c; \ |
|
38 |
+ c->avg_h264_chroma_pixels_tab[1] = avg_h264_chroma_mc4_ ## depth ## _c; \ |
|
39 |
+ c->avg_h264_chroma_pixels_tab[2] = avg_h264_chroma_mc2_ ## depth ## _c; \ |
|
40 |
+ |
|
41 |
+void ff_h264chroma_init(H264ChromaContext *c, int bit_depth) |
|
42 |
+{ |
|
43 |
+ switch (bit_depth) { |
|
44 |
+ case 10: |
|
45 |
+ SET_CHROMA(10); |
|
46 |
+ break; |
|
47 |
+ case 9: |
|
48 |
+ SET_CHROMA(9); |
|
49 |
+ break; |
|
50 |
+ default: |
|
51 |
+ SET_CHROMA(8); |
|
52 |
+ break; |
|
53 |
+ } |
|
54 |
+ |
|
55 |
+ if (ARCH_ARM) |
|
56 |
+ ff_h264chroma_init_arm(c, bit_depth); |
|
57 |
+ if (ARCH_PPC) |
|
58 |
+ ff_h264chroma_init_ppc(c, bit_depth); |
|
59 |
+ if (ARCH_SH4) |
|
60 |
+ ff_h264chroma_init_sh4(c, bit_depth); |
|
61 |
+ if (ARCH_X86) |
|
62 |
+ ff_h264chroma_init_x86(c, bit_depth); |
|
63 |
+} |
0 | 64 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,38 @@ |
0 |
+/* |
|
1 |
+ * This file is part of Libav. |
|
2 |
+ * |
|
3 |
+ * Libav is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * Libav is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with Libav; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#ifndef AVCODEC_H264CHROMA_H |
|
19 |
+#define AVCODEC_H264CHROMA_H |
|
20 |
+ |
|
21 |
+#include <stdint.h> |
|
22 |
+ |
|
23 |
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); |
|
24 |
+ |
|
25 |
+typedef struct H264ChromaContext { |
|
26 |
+ h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; |
|
27 |
+ h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; |
|
28 |
+} H264ChromaContext; |
|
29 |
+ |
|
30 |
+void ff_h264chroma_init(H264ChromaContext *c, int bit_depth); |
|
31 |
+ |
|
32 |
+void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth); |
|
33 |
+void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth); |
|
34 |
+void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth); |
|
35 |
+void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth); |
|
36 |
+ |
|
37 |
+#endif /* AVCODEC_H264CHROMA_H */ |
0 | 38 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,142 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2000, 2001 Fabrice Bellard |
|
2 |
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
3 |
+ * |
|
4 |
+ * This file is part of Libav. |
|
5 |
+ * |
|
6 |
+ * Libav is free software; you can redistribute it and/or |
|
7 |
+ * modify it under the terms of the GNU Lesser General Public |
|
8 |
+ * License as published by the Free Software Foundation; either |
|
9 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
10 |
+ * |
|
11 |
+ * Libav is distributed in the hope that it will be useful, |
|
12 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 |
+ * Lesser General Public License for more details. |
|
15 |
+ * |
|
16 |
+ * You should have received a copy of the GNU Lesser General Public |
|
17 |
+ * License along with Libav; if not, write to the Free Software |
|
18 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 |
+ */ |
|
20 |
+ |
|
21 |
+#include <assert.h> |
|
22 |
+ |
|
23 |
+#include "bit_depth_template.c" |
|
24 |
+ |
|
25 |
+#define H264_CHROMA_MC(OPNAME, OP)\ |
|
26 |
+static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ |
|
27 |
+ pixel *dst = (pixel*)_dst;\ |
|
28 |
+ pixel *src = (pixel*)_src;\ |
|
29 |
+ const int A=(8-x)*(8-y);\ |
|
30 |
+ const int B=( x)*(8-y);\ |
|
31 |
+ const int C=(8-x)*( y);\ |
|
32 |
+ const int D=( x)*( y);\ |
|
33 |
+ int i;\ |
|
34 |
+ stride /= sizeof(pixel);\ |
|
35 |
+ \ |
|
36 |
+ assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
37 |
+\ |
|
38 |
+ if(D){\ |
|
39 |
+ for(i=0; i<h; i++){\ |
|
40 |
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
|
41 |
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ |
|
42 |
+ dst+= stride;\ |
|
43 |
+ src+= stride;\ |
|
44 |
+ }\ |
|
45 |
+ }else{\ |
|
46 |
+ const int E= B+C;\ |
|
47 |
+ const int step= C ? stride : 1;\ |
|
48 |
+ for(i=0; i<h; i++){\ |
|
49 |
+ OP(dst[0], (A*src[0] + E*src[step+0]));\ |
|
50 |
+ OP(dst[1], (A*src[1] + E*src[step+1]));\ |
|
51 |
+ dst+= stride;\ |
|
52 |
+ src+= stride;\ |
|
53 |
+ }\ |
|
54 |
+ }\ |
|
55 |
+}\ |
|
56 |
+\ |
|
57 |
+static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ |
|
58 |
+ pixel *dst = (pixel*)_dst;\ |
|
59 |
+ pixel *src = (pixel*)_src;\ |
|
60 |
+ const int A=(8-x)*(8-y);\ |
|
61 |
+ const int B=( x)*(8-y);\ |
|
62 |
+ const int C=(8-x)*( y);\ |
|
63 |
+ const int D=( x)*( y);\ |
|
64 |
+ int i;\ |
|
65 |
+ stride /= sizeof(pixel);\ |
|
66 |
+ \ |
|
67 |
+ assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
68 |
+\ |
|
69 |
+ if(D){\ |
|
70 |
+ for(i=0; i<h; i++){\ |
|
71 |
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
|
72 |
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ |
|
73 |
+ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ |
|
74 |
+ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ |
|
75 |
+ dst+= stride;\ |
|
76 |
+ src+= stride;\ |
|
77 |
+ }\ |
|
78 |
+ }else{\ |
|
79 |
+ const int E= B+C;\ |
|
80 |
+ const int step= C ? stride : 1;\ |
|
81 |
+ for(i=0; i<h; i++){\ |
|
82 |
+ OP(dst[0], (A*src[0] + E*src[step+0]));\ |
|
83 |
+ OP(dst[1], (A*src[1] + E*src[step+1]));\ |
|
84 |
+ OP(dst[2], (A*src[2] + E*src[step+2]));\ |
|
85 |
+ OP(dst[3], (A*src[3] + E*src[step+3]));\ |
|
86 |
+ dst+= stride;\ |
|
87 |
+ src+= stride;\ |
|
88 |
+ }\ |
|
89 |
+ }\ |
|
90 |
+}\ |
|
91 |
+\ |
|
92 |
+static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ |
|
93 |
+ pixel *dst = (pixel*)_dst;\ |
|
94 |
+ pixel *src = (pixel*)_src;\ |
|
95 |
+ const int A=(8-x)*(8-y);\ |
|
96 |
+ const int B=( x)*(8-y);\ |
|
97 |
+ const int C=(8-x)*( y);\ |
|
98 |
+ const int D=( x)*( y);\ |
|
99 |
+ int i;\ |
|
100 |
+ stride /= sizeof(pixel);\ |
|
101 |
+ \ |
|
102 |
+ assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
103 |
+\ |
|
104 |
+ if(D){\ |
|
105 |
+ for(i=0; i<h; i++){\ |
|
106 |
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
|
107 |
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ |
|
108 |
+ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ |
|
109 |
+ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ |
|
110 |
+ OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ |
|
111 |
+ OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ |
|
112 |
+ OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ |
|
113 |
+ OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ |
|
114 |
+ dst+= stride;\ |
|
115 |
+ src+= stride;\ |
|
116 |
+ }\ |
|
117 |
+ }else{\ |
|
118 |
+ const int E= B+C;\ |
|
119 |
+ const int step= C ? stride : 1;\ |
|
120 |
+ for(i=0; i<h; i++){\ |
|
121 |
+ OP(dst[0], (A*src[0] + E*src[step+0]));\ |
|
122 |
+ OP(dst[1], (A*src[1] + E*src[step+1]));\ |
|
123 |
+ OP(dst[2], (A*src[2] + E*src[step+2]));\ |
|
124 |
+ OP(dst[3], (A*src[3] + E*src[step+3]));\ |
|
125 |
+ OP(dst[4], (A*src[4] + E*src[step+4]));\ |
|
126 |
+ OP(dst[5], (A*src[5] + E*src[step+5]));\ |
|
127 |
+ OP(dst[6], (A*src[6] + E*src[step+6]));\ |
|
128 |
+ OP(dst[7], (A*src[7] + E*src[step+7]));\ |
|
129 |
+ dst+= stride;\ |
|
130 |
+ src+= stride;\ |
|
131 |
+ }\ |
|
132 |
+ }\ |
|
133 |
+} |
|
134 |
+ |
|
135 |
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) |
|
136 |
+#define op_put(a, b) a = (((b) + 32)>>6) |
|
137 |
+ |
|
138 |
+H264_CHROMA_MC(put_ , op_put) |
|
139 |
+H264_CHROMA_MC(avg_ , op_avg) |
|
140 |
+#undef op_avg |
|
141 |
+#undef op_put |
... | ... |
@@ -36,8 +36,6 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, |
36 | 36 |
void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); |
37 | 37 |
void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); |
38 | 38 |
|
39 |
-void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); |
|
40 |
- |
|
41 | 39 |
void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); |
42 | 40 |
void ff_float_init_altivec(DSPContext* c, AVCodecContext *avctx); |
43 | 41 |
void ff_int_init_altivec(DSPContext* c, AVCodecContext *avctx); |
... | ... |
@@ -157,8 +157,6 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) |
157 | 157 |
} |
158 | 158 |
|
159 | 159 |
#if HAVE_ALTIVEC |
160 |
- if(CONFIG_H264_DECODER) ff_dsputil_h264_init_ppc(c, avctx); |
|
161 |
- |
|
162 | 160 |
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { |
163 | 161 |
ff_dsputil_init_altivec(c, avctx); |
164 | 162 |
ff_int_init_altivec(c, avctx); |
... | ... |
@@ -33,8 +33,6 @@ |
33 | 33 |
#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s) |
34 | 34 |
|
35 | 35 |
#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC |
36 |
-#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec |
|
37 |
-#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num |
|
38 | 36 |
#define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec |
39 | 37 |
#define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num |
40 | 38 |
#define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec |
... | ... |
@@ -43,8 +41,6 @@ |
43 | 43 |
#define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num |
44 | 44 |
#include "h264_qpel_template.c" |
45 | 45 |
#undef OP_U8_ALTIVEC |
46 |
-#undef PREFIX_h264_chroma_mc8_altivec |
|
47 |
-#undef PREFIX_h264_chroma_mc8_num |
|
48 | 46 |
#undef PREFIX_h264_qpel16_h_lowpass_altivec |
49 | 47 |
#undef PREFIX_h264_qpel16_h_lowpass_num |
50 | 48 |
#undef PREFIX_h264_qpel16_v_lowpass_altivec |
... | ... |
@@ -53,8 +49,6 @@ |
53 | 53 |
#undef PREFIX_h264_qpel16_hv_lowpass_num |
54 | 54 |
|
55 | 55 |
#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC |
56 |
-#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec |
|
57 |
-#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num |
|
58 | 56 |
#define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec |
59 | 57 |
#define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num |
60 | 58 |
#define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec |
... | ... |
@@ -63,8 +57,6 @@ |
63 | 63 |
#define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num |
64 | 64 |
#include "h264_qpel_template.c" |
65 | 65 |
#undef OP_U8_ALTIVEC |
66 |
-#undef PREFIX_h264_chroma_mc8_altivec |
|
67 |
-#undef PREFIX_h264_chroma_mc8_num |
|
68 | 66 |
#undef PREFIX_h264_qpel16_h_lowpass_altivec |
69 | 67 |
#undef PREFIX_h264_qpel16_h_lowpass_num |
70 | 68 |
#undef PREFIX_h264_qpel16_v_lowpass_altivec |
... | ... |
@@ -273,18 +265,6 @@ static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, |
273 | 273 |
|
274 | 274 |
H264_MC(put_, 16, altivec) |
275 | 275 |
H264_MC(avg_, 16, altivec) |
276 |
- |
|
277 |
-void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) |
|
278 |
-{ |
|
279 |
- const int high_bit_depth = avctx->bits_per_raw_sample > 8; |
|
280 |
- |
|
281 |
- if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { |
|
282 |
- if (!high_bit_depth) { |
|
283 |
- c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec; |
|
284 |
- c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; |
|
285 |
- } |
|
286 |
- } |
|
287 |
-} |
|
288 | 276 |
#endif /* HAVE_ALTIVEC */ |
289 | 277 |
|
290 | 278 |
av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth) |
... | ... |
@@ -26,274 +26,6 @@ |
26 | 26 |
#define ASSERT_ALIGNED(ptr) ; |
27 | 27 |
#endif |
28 | 28 |
|
29 |
-/* this code assume that stride % 16 == 0 */ |
|
30 |
- |
|
31 |
-#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \ |
|
32 |
- vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\ |
|
33 |
- vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\ |
|
34 |
-\ |
|
35 |
- psum = vec_mladd(vA, vsrc0ssH, BIAS1);\ |
|
36 |
- psum = vec_mladd(vB, vsrc1ssH, psum);\ |
|
37 |
- psum = vec_mladd(vC, vsrc2ssH, psum);\ |
|
38 |
- psum = vec_mladd(vD, vsrc3ssH, psum);\ |
|
39 |
- psum = BIAS2(psum);\ |
|
40 |
- psum = vec_sr(psum, v6us);\ |
|
41 |
-\ |
|
42 |
- vdst = vec_ld(0, dst);\ |
|
43 |
- ppsum = (vec_u8)vec_pack(psum, psum);\ |
|
44 |
- vfdst = vec_perm(vdst, ppsum, fperm);\ |
|
45 |
-\ |
|
46 |
- OP_U8_ALTIVEC(fsum, vfdst, vdst);\ |
|
47 |
-\ |
|
48 |
- vec_st(fsum, 0, dst);\ |
|
49 |
-\ |
|
50 |
- vsrc0ssH = vsrc2ssH;\ |
|
51 |
- vsrc1ssH = vsrc3ssH;\ |
|
52 |
-\ |
|
53 |
- dst += stride;\ |
|
54 |
- src += stride; |
|
55 |
- |
|
56 |
-#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \ |
|
57 |
-\ |
|
58 |
- vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\ |
|
59 |
- vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\ |
|
60 |
-\ |
|
61 |
- psum = vec_mladd(vA, vsrc0ssH, v32ss);\ |
|
62 |
- psum = vec_mladd(vE, vsrc1ssH, psum);\ |
|
63 |
- psum = vec_sr(psum, v6us);\ |
|
64 |
-\ |
|
65 |
- vdst = vec_ld(0, dst);\ |
|
66 |
- ppsum = (vec_u8)vec_pack(psum, psum);\ |
|
67 |
- vfdst = vec_perm(vdst, ppsum, fperm);\ |
|
68 |
-\ |
|
69 |
- OP_U8_ALTIVEC(fsum, vfdst, vdst);\ |
|
70 |
-\ |
|
71 |
- vec_st(fsum, 0, dst);\ |
|
72 |
-\ |
|
73 |
- dst += stride;\ |
|
74 |
- src += stride; |
|
75 |
- |
|
76 |
-#define noop(a) a |
|
77 |
-#define add28(a) vec_add(v28ss, a) |
|
78 |
- |
|
79 |
-#ifdef PREFIX_h264_chroma_mc8_altivec |
|
80 |
-static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, |
|
81 |
- int stride, int h, int x, int y) { |
|
82 |
- DECLARE_ALIGNED(16, signed int, ABCD)[4] = |
|
83 |
- {((8 - x) * (8 - y)), |
|
84 |
- (( x) * (8 - y)), |
|
85 |
- ((8 - x) * ( y)), |
|
86 |
- (( x) * ( y))}; |
|
87 |
- register int i; |
|
88 |
- vec_u8 fperm; |
|
89 |
- const vec_s32 vABCD = vec_ld(0, ABCD); |
|
90 |
- const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); |
|
91 |
- const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); |
|
92 |
- const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); |
|
93 |
- const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); |
|
94 |
- LOAD_ZERO; |
|
95 |
- const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5)); |
|
96 |
- const vec_u16 v6us = vec_splat_u16(6); |
|
97 |
- register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; |
|
98 |
- register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; |
|
99 |
- |
|
100 |
- vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; |
|
101 |
- vec_u8 vsrc0uc, vsrc1uc; |
|
102 |
- vec_s16 vsrc0ssH, vsrc1ssH; |
|
103 |
- vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; |
|
104 |
- vec_s16 vsrc2ssH, vsrc3ssH, psum; |
|
105 |
- vec_u8 vdst, ppsum, vfdst, fsum; |
|
106 |
- |
|
107 |
- if (((unsigned long)dst) % 16 == 0) { |
|
108 |
- fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, |
|
109 |
- 0x14, 0x15, 0x16, 0x17, |
|
110 |
- 0x08, 0x09, 0x0A, 0x0B, |
|
111 |
- 0x0C, 0x0D, 0x0E, 0x0F}; |
|
112 |
- } else { |
|
113 |
- fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, |
|
114 |
- 0x04, 0x05, 0x06, 0x07, |
|
115 |
- 0x18, 0x19, 0x1A, 0x1B, |
|
116 |
- 0x1C, 0x1D, 0x1E, 0x1F}; |
|
117 |
- } |
|
118 |
- |
|
119 |
- vsrcAuc = vec_ld(0, src); |
|
120 |
- |
|
121 |
- if (loadSecond) |
|
122 |
- vsrcBuc = vec_ld(16, src); |
|
123 |
- vsrcperm0 = vec_lvsl(0, src); |
|
124 |
- vsrcperm1 = vec_lvsl(1, src); |
|
125 |
- |
|
126 |
- vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); |
|
127 |
- if (reallyBadAlign) |
|
128 |
- vsrc1uc = vsrcBuc; |
|
129 |
- else |
|
130 |
- vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); |
|
131 |
- |
|
132 |
- vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc); |
|
133 |
- vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc); |
|
134 |
- |
|
135 |
- if (ABCD[3]) { |
|
136 |
- if (!loadSecond) {// -> !reallyBadAlign |
|
137 |
- for (i = 0 ; i < h ; i++) { |
|
138 |
- vsrcCuc = vec_ld(stride + 0, src); |
|
139 |
- vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
|
140 |
- vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
|
141 |
- |
|
142 |
- CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
|
143 |
- } |
|
144 |
- } else { |
|
145 |
- vec_u8 vsrcDuc; |
|
146 |
- for (i = 0 ; i < h ; i++) { |
|
147 |
- vsrcCuc = vec_ld(stride + 0, src); |
|
148 |
- vsrcDuc = vec_ld(stride + 16, src); |
|
149 |
- vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
|
150 |
- if (reallyBadAlign) |
|
151 |
- vsrc3uc = vsrcDuc; |
|
152 |
- else |
|
153 |
- vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
|
154 |
- |
|
155 |
- CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
|
156 |
- } |
|
157 |
- } |
|
158 |
- } else { |
|
159 |
- const vec_s16 vE = vec_add(vB, vC); |
|
160 |
- if (ABCD[2]) { // x == 0 B == 0 |
|
161 |
- if (!loadSecond) {// -> !reallyBadAlign |
|
162 |
- for (i = 0 ; i < h ; i++) { |
|
163 |
- vsrcCuc = vec_ld(stride + 0, src); |
|
164 |
- vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
|
165 |
- CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
|
166 |
- |
|
167 |
- vsrc0uc = vsrc1uc; |
|
168 |
- } |
|
169 |
- } else { |
|
170 |
- vec_u8 vsrcDuc; |
|
171 |
- for (i = 0 ; i < h ; i++) { |
|
172 |
- vsrcCuc = vec_ld(stride + 0, src); |
|
173 |
- vsrcDuc = vec_ld(stride + 15, src); |
|
174 |
- vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
|
175 |
- CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
|
176 |
- |
|
177 |
- vsrc0uc = vsrc1uc; |
|
178 |
- } |
|
179 |
- } |
|
180 |
- } else { // y == 0 C == 0 |
|
181 |
- if (!loadSecond) {// -> !reallyBadAlign |
|
182 |
- for (i = 0 ; i < h ; i++) { |
|
183 |
- vsrcCuc = vec_ld(0, src); |
|
184 |
- vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
|
185 |
- vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
|
186 |
- |
|
187 |
- CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
|
188 |
- } |
|
189 |
- } else { |
|
190 |
- vec_u8 vsrcDuc; |
|
191 |
- for (i = 0 ; i < h ; i++) { |
|
192 |
- vsrcCuc = vec_ld(0, src); |
|
193 |
- vsrcDuc = vec_ld(15, src); |
|
194 |
- vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
|
195 |
- if (reallyBadAlign) |
|
196 |
- vsrc1uc = vsrcDuc; |
|
197 |
- else |
|
198 |
- vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
|
199 |
- |
|
200 |
- CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
|
201 |
- } |
|
202 |
- } |
|
203 |
- } |
|
204 |
- } |
|
205 |
-} |
|
206 |
-#endif |
|
207 |
- |
|
208 |
-/* this code assume that stride % 16 == 0 */ |
|
209 |
-#ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec |
|
210 |
-static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { |
|
211 |
- DECLARE_ALIGNED(16, signed int, ABCD)[4] = |
|
212 |
- {((8 - x) * (8 - y)), |
|
213 |
- (( x) * (8 - y)), |
|
214 |
- ((8 - x) * ( y)), |
|
215 |
- (( x) * ( y))}; |
|
216 |
- register int i; |
|
217 |
- vec_u8 fperm; |
|
218 |
- const vec_s32 vABCD = vec_ld(0, ABCD); |
|
219 |
- const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); |
|
220 |
- const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); |
|
221 |
- const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); |
|
222 |
- const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); |
|
223 |
- LOAD_ZERO; |
|
224 |
- const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4)); |
|
225 |
- const vec_u16 v6us = vec_splat_u16(6); |
|
226 |
- register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; |
|
227 |
- register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; |
|
228 |
- |
|
229 |
- vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; |
|
230 |
- vec_u8 vsrc0uc, vsrc1uc; |
|
231 |
- vec_s16 vsrc0ssH, vsrc1ssH; |
|
232 |
- vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; |
|
233 |
- vec_s16 vsrc2ssH, vsrc3ssH, psum; |
|
234 |
- vec_u8 vdst, ppsum, vfdst, fsum; |
|
235 |
- |
|
236 |
- if (((unsigned long)dst) % 16 == 0) { |
|
237 |
- fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, |
|
238 |
- 0x14, 0x15, 0x16, 0x17, |
|
239 |
- 0x08, 0x09, 0x0A, 0x0B, |
|
240 |
- 0x0C, 0x0D, 0x0E, 0x0F}; |
|
241 |
- } else { |
|
242 |
- fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, |
|
243 |
- 0x04, 0x05, 0x06, 0x07, |
|
244 |
- 0x18, 0x19, 0x1A, 0x1B, |
|
245 |
- 0x1C, 0x1D, 0x1E, 0x1F}; |
|
246 |
- } |
|
247 |
- |
|
248 |
- vsrcAuc = vec_ld(0, src); |
|
249 |
- |
|
250 |
- if (loadSecond) |
|
251 |
- vsrcBuc = vec_ld(16, src); |
|
252 |
- vsrcperm0 = vec_lvsl(0, src); |
|
253 |
- vsrcperm1 = vec_lvsl(1, src); |
|
254 |
- |
|
255 |
- vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); |
|
256 |
- if (reallyBadAlign) |
|
257 |
- vsrc1uc = vsrcBuc; |
|
258 |
- else |
|
259 |
- vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); |
|
260 |
- |
|
261 |
- vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc); |
|
262 |
- vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc); |
|
263 |
- |
|
264 |
- if (!loadSecond) {// -> !reallyBadAlign |
|
265 |
- for (i = 0 ; i < h ; i++) { |
|
266 |
- |
|
267 |
- |
|
268 |
- vsrcCuc = vec_ld(stride + 0, src); |
|
269 |
- |
|
270 |
- vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
|
271 |
- vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
|
272 |
- |
|
273 |
- CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) |
|
274 |
- } |
|
275 |
- } else { |
|
276 |
- vec_u8 vsrcDuc; |
|
277 |
- for (i = 0 ; i < h ; i++) { |
|
278 |
- vsrcCuc = vec_ld(stride + 0, src); |
|
279 |
- vsrcDuc = vec_ld(stride + 16, src); |
|
280 |
- |
|
281 |
- vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
|
282 |
- if (reallyBadAlign) |
|
283 |
- vsrc3uc = vsrcDuc; |
|
284 |
- else |
|
285 |
- vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
|
286 |
- |
|
287 |
- CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) |
|
288 |
- } |
|
289 |
- } |
|
290 |
-} |
|
291 |
-#endif |
|
292 |
- |
|
293 |
-#undef noop |
|
294 |
-#undef add28 |
|
295 |
-#undef CHROMA_MC8_ALTIVEC_CORE |
|
296 |
- |
|
297 | 29 |
/* this code assume stride % 16 == 0 */ |
298 | 30 |
#ifdef PREFIX_h264_qpel16_h_lowpass_altivec |
299 | 31 |
static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { |
300 | 32 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,64 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> |
|
2 |
+ * |
|
3 |
+ * This file is part of Libav. |
|
4 |
+ * |
|
5 |
+ * Libav is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * Libav is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with Libav; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include "config.h" |
|
21 |
+#include "libavutil/attributes.h" |
|
22 |
+#include "libavcodec/h264chroma.h" |
|
23 |
+ |
|
24 |
+#if HAVE_ALTIVEC |
|
25 |
+#include "libavutil/cpu.h" |
|
26 |
+#include "libavutil/intreadwrite.h" |
|
27 |
+#include "libavutil/ppc/types_altivec.h" |
|
28 |
+#include "libavutil/ppc/util_altivec.h" |
|
29 |
+#include "dsputil_altivec.h" |
|
30 |
+ |
|
31 |
+#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s |
|
32 |
+#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s) |
|
33 |
+ |
|
34 |
+#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC |
|
35 |
+#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec |
|
36 |
+#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num |
|
37 |
+#include "h264chroma_template.c" |
|
38 |
+#undef OP_U8_ALTIVEC |
|
39 |
+#undef PREFIX_h264_chroma_mc8_altivec |
|
40 |
+#undef PREFIX_h264_chroma_mc8_num |
|
41 |
+ |
|
42 |
+#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC |
|
43 |
+#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec |
|
44 |
+#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num |
|
45 |
+#include "h264chroma_template.c" |
|
46 |
+#undef OP_U8_ALTIVEC |
|
47 |
+#undef PREFIX_h264_chroma_mc8_altivec |
|
48 |
+#undef PREFIX_h264_chroma_mc8_num |
|
49 |
+#endif /* HAVE_ALTIVEC */ |
|
50 |
+ |
|
51 |
+av_cold void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth) |
|
52 |
+{ |
|
53 |
+#if HAVE_ALTIVEC |
|
54 |
+ const int high_bit_depth = bit_depth > 8; |
|
55 |
+ |
|
56 |
+ if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { |
|
57 |
+ if (!high_bit_depth) { |
|
58 |
+ c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec; |
|
59 |
+ c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; |
|
60 |
+ } |
|
61 |
+ } |
|
62 |
+#endif /* HAVE_ALTIVEC */ |
|
63 |
+} |
0 | 64 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,289 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> |
|
2 |
+ * |
|
3 |
+ * This file is part of Libav. |
|
4 |
+ * |
|
5 |
+ * Libav is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * Libav is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with Libav; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include "libavutil/mem.h" |
|
21 |
+ |
|
22 |
+/* this code assume that stride % 16 == 0 */ |
|
23 |
+ |
|
24 |
+#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \ |
|
25 |
+ vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\ |
|
26 |
+ vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\ |
|
27 |
+\ |
|
28 |
+ psum = vec_mladd(vA, vsrc0ssH, BIAS1);\ |
|
29 |
+ psum = vec_mladd(vB, vsrc1ssH, psum);\ |
|
30 |
+ psum = vec_mladd(vC, vsrc2ssH, psum);\ |
|
31 |
+ psum = vec_mladd(vD, vsrc3ssH, psum);\ |
|
32 |
+ psum = BIAS2(psum);\ |
|
33 |
+ psum = vec_sr(psum, v6us);\ |
|
34 |
+\ |
|
35 |
+ vdst = vec_ld(0, dst);\ |
|
36 |
+ ppsum = (vec_u8)vec_pack(psum, psum);\ |
|
37 |
+ vfdst = vec_perm(vdst, ppsum, fperm);\ |
|
38 |
+\ |
|
39 |
+ OP_U8_ALTIVEC(fsum, vfdst, vdst);\ |
|
40 |
+\ |
|
41 |
+ vec_st(fsum, 0, dst);\ |
|
42 |
+\ |
|
43 |
+ vsrc0ssH = vsrc2ssH;\ |
|
44 |
+ vsrc1ssH = vsrc3ssH;\ |
|
45 |
+\ |
|
46 |
+ dst += stride;\ |
|
47 |
+ src += stride; |
|
48 |
+ |
|
49 |
+#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \ |
|
50 |
+\ |
|
51 |
+ vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\ |
|
52 |
+ vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\ |
|
53 |
+\ |
|
54 |
+ psum = vec_mladd(vA, vsrc0ssH, v32ss);\ |
|
55 |
+ psum = vec_mladd(vE, vsrc1ssH, psum);\ |
|
56 |
+ psum = vec_sr(psum, v6us);\ |
|
57 |
+\ |
|
58 |
+ vdst = vec_ld(0, dst);\ |
|
59 |
+ ppsum = (vec_u8)vec_pack(psum, psum);\ |
|
60 |
+ vfdst = vec_perm(vdst, ppsum, fperm);\ |
|
61 |
+\ |
|
62 |
+ OP_U8_ALTIVEC(fsum, vfdst, vdst);\ |
|
63 |
+\ |
|
64 |
+ vec_st(fsum, 0, dst);\ |
|
65 |
+\ |
|
66 |
+ dst += stride;\ |
|
67 |
+ src += stride; |
|
68 |
+ |
|
69 |
+#define noop(a) a |
|
70 |
+#define add28(a) vec_add(v28ss, a) |
|
71 |
+ |
|
72 |
+#ifdef PREFIX_h264_chroma_mc8_altivec |
|
73 |
+static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, |
|
74 |
+ int stride, int h, int x, int y) { |
|
75 |
+ DECLARE_ALIGNED(16, signed int, ABCD)[4] = |
|
76 |
+ {((8 - x) * (8 - y)), |
|
77 |
+ (( x) * (8 - y)), |
|
78 |
+ ((8 - x) * ( y)), |
|
79 |
+ (( x) * ( y))}; |
|
80 |
+ register int i; |
|
81 |
+ vec_u8 fperm; |
|
82 |
+ const vec_s32 vABCD = vec_ld(0, ABCD); |
|
83 |
+ const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); |
|
84 |
+ const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); |
|
85 |
+ const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); |
|
86 |
+ const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); |
|
87 |
+ LOAD_ZERO; |
|
88 |
+ const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5)); |
|
89 |
+ const vec_u16 v6us = vec_splat_u16(6); |
|
90 |
+ register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; |
|
91 |
+ register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; |
|
92 |
+ |
|
93 |
+ vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; |
|
94 |
+ vec_u8 vsrc0uc, vsrc1uc; |
|
95 |
+ vec_s16 vsrc0ssH, vsrc1ssH; |
|
96 |
+ vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; |
|
97 |
+ vec_s16 vsrc2ssH, vsrc3ssH, psum; |
|
98 |
+ vec_u8 vdst, ppsum, vfdst, fsum; |
|
99 |
+ |
|
100 |
+ if (((unsigned long)dst) % 16 == 0) { |
|
101 |
+ fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, |
|
102 |
+ 0x14, 0x15, 0x16, 0x17, |
|
103 |
+ 0x08, 0x09, 0x0A, 0x0B, |
|
104 |
+ 0x0C, 0x0D, 0x0E, 0x0F}; |
|
105 |
+ } else { |
|
106 |
+ fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, |
|
107 |
+ 0x04, 0x05, 0x06, 0x07, |
|
108 |
+ 0x18, 0x19, 0x1A, 0x1B, |
|
109 |
+ 0x1C, 0x1D, 0x1E, 0x1F}; |
|
110 |
+ } |
|
111 |
+ |
|
112 |
+ vsrcAuc = vec_ld(0, src); |
|
113 |
+ |
|
114 |
+ if (loadSecond) |
|
115 |
+ vsrcBuc = vec_ld(16, src); |
|
116 |
+ vsrcperm0 = vec_lvsl(0, src); |
|
117 |
+ vsrcperm1 = vec_lvsl(1, src); |
|
118 |
+ |
|
119 |
+ vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); |
|
120 |
+ if (reallyBadAlign) |
|
121 |
+ vsrc1uc = vsrcBuc; |
|
122 |
+ else |
|
123 |
+ vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); |
|
124 |
+ |
|
125 |
+ vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc); |
|
126 |
+ vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc); |
|
127 |
+ |
|
128 |
+ if (ABCD[3]) { |
|
129 |
+ if (!loadSecond) {// -> !reallyBadAlign |
|
130 |
+ for (i = 0 ; i < h ; i++) { |
|
131 |
+ vsrcCuc = vec_ld(stride + 0, src); |
|
132 |
+ vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
|
133 |
+ vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
|
134 |
+ |
|
135 |
+ CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
|
136 |
+ } |
|
137 |
+ } else { |
|
138 |
+ vec_u8 vsrcDuc; |
|
139 |
+ for (i = 0 ; i < h ; i++) { |
|
140 |
+ vsrcCuc = vec_ld(stride + 0, src); |
|
141 |
+ vsrcDuc = vec_ld(stride + 16, src); |
|
142 |
+ vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
|
143 |
+ if (reallyBadAlign) |
|
144 |
+ vsrc3uc = vsrcDuc; |
|
145 |
+ else |
|
146 |
+ vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
|
147 |
+ |
|
148 |
+ CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
|
149 |
+ } |
|
150 |
+ } |
|
151 |
+ } else { |
|
152 |
+ const vec_s16 vE = vec_add(vB, vC); |
|
153 |
+ if (ABCD[2]) { // x == 0 B == 0 |
|
154 |
+ if (!loadSecond) {// -> !reallyBadAlign |
|
155 |
+ for (i = 0 ; i < h ; i++) { |
|
156 |
+ vsrcCuc = vec_ld(stride + 0, src); |
|
157 |
+ vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
|
158 |
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
|
159 |
+ |
|
160 |
+ vsrc0uc = vsrc1uc; |
|
161 |
+ } |
|
162 |
+ } else { |
|
163 |
+ vec_u8 vsrcDuc; |
|
164 |
+ for (i = 0 ; i < h ; i++) { |
|
165 |
+ vsrcCuc = vec_ld(stride + 0, src); |
|
166 |
+ vsrcDuc = vec_ld(stride + 15, src); |
|
167 |
+ vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
|
168 |
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
|
169 |
+ |
|
170 |
+ vsrc0uc = vsrc1uc; |
|
171 |
+ } |
|
172 |
+ } |
|
173 |
+ } else { // y == 0 C == 0 |
|
174 |
+ if (!loadSecond) {// -> !reallyBadAlign |
|
175 |
+ for (i = 0 ; i < h ; i++) { |
|
176 |
+ vsrcCuc = vec_ld(0, src); |
|
177 |
+ vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
|
178 |
+ vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
|
179 |
+ |
|
180 |
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
|
181 |
+ } |
|
182 |
+ } else { |
|
183 |
+ vec_u8 vsrcDuc; |
|
184 |
+ for (i = 0 ; i < h ; i++) { |
|
185 |
+ vsrcCuc = vec_ld(0, src); |
|
186 |
+ vsrcDuc = vec_ld(15, src); |
|
187 |
+ vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
|
188 |
+ if (reallyBadAlign) |
|
189 |
+ vsrc1uc = vsrcDuc; |
|
190 |
+ else |
|
191 |
+ vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
|
192 |
+ |
|
193 |
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
|
194 |
+ } |
|
195 |
+ } |
|
196 |
+ } |
|
197 |
+ } |
|
198 |
+} |
|
199 |
+#endif |
|
200 |
+ |
|
201 |
+/* this code assume that stride % 16 == 0 */ |
|
202 |
+#ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec |
|
203 |
+static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { |
|
204 |
+ DECLARE_ALIGNED(16, signed int, ABCD)[4] = |
|
205 |
+ {((8 - x) * (8 - y)), |
|
206 |
+ (( x) * (8 - y)), |
|
207 |
+ ((8 - x) * ( y)), |
|
208 |
+ (( x) * ( y))}; |
|
209 |
+ register int i; |
|
210 |
+ vec_u8 fperm; |
|
211 |
+ const vec_s32 vABCD = vec_ld(0, ABCD); |
|
212 |
+ const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); |
|
213 |
+ const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); |
|
214 |
+ const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); |
|
215 |
+ const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); |
|
216 |
+ LOAD_ZERO; |
|
217 |
+ const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4)); |
|
218 |
+ const vec_u16 v6us = vec_splat_u16(6); |
|
219 |
+ register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; |
|
220 |
+ register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; |
|
221 |
+ |
|
222 |
+ vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; |
|
223 |
+ vec_u8 vsrc0uc, vsrc1uc; |
|
224 |
+ vec_s16 vsrc0ssH, vsrc1ssH; |
|
225 |
+ vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; |
|
226 |
+ vec_s16 vsrc2ssH, vsrc3ssH, psum; |
|
227 |
+ vec_u8 vdst, ppsum, vfdst, fsum; |
|
228 |
+ |
|
229 |
+ if (((unsigned long)dst) % 16 == 0) { |
|
230 |
+ fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, |
|
231 |
+ 0x14, 0x15, 0x16, 0x17, |
|
232 |
+ 0x08, 0x09, 0x0A, 0x0B, |
|
233 |
+ 0x0C, 0x0D, 0x0E, 0x0F}; |
|
234 |
+ } else { |
|
235 |
+ fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, |
|
236 |
+ 0x04, 0x05, 0x06, 0x07, |
|
237 |
+ 0x18, 0x19, 0x1A, 0x1B, |
|
238 |
+ 0x1C, 0x1D, 0x1E, 0x1F}; |
|
239 |
+ } |
|
240 |
+ |
|
241 |
+ vsrcAuc = vec_ld(0, src); |
|
242 |
+ |
|
243 |
+ if (loadSecond) |
|
244 |
+ vsrcBuc = vec_ld(16, src); |
|
245 |
+ vsrcperm0 = vec_lvsl(0, src); |
|
246 |
+ vsrcperm1 = vec_lvsl(1, src); |
|
247 |
+ |
|
248 |
+ vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); |
|
249 |
+ if (reallyBadAlign) |
|
250 |
+ vsrc1uc = vsrcBuc; |
|
251 |
+ else |
|
252 |
+ vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); |
|
253 |
+ |
|
254 |
+ vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc); |
|
255 |
+ vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc); |
|
256 |
+ |
|
257 |
+ if (!loadSecond) {// -> !reallyBadAlign |
|
258 |
+ for (i = 0 ; i < h ; i++) { |
|
259 |
+ |
|
260 |
+ |
|
261 |
+ vsrcCuc = vec_ld(stride + 0, src); |
|
262 |
+ |
|
263 |
+ vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
|
264 |
+ vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
|
265 |
+ |
|
266 |
+ CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) |
|
267 |
+ } |
|
268 |
+ } else { |
|
269 |
+ vec_u8 vsrcDuc; |
|
270 |
+ for (i = 0 ; i < h ; i++) { |
|
271 |
+ vsrcCuc = vec_ld(stride + 0, src); |
|
272 |
+ vsrcDuc = vec_ld(stride + 16, src); |
|
273 |
+ |
|
274 |
+ vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
|
275 |
+ if (reallyBadAlign) |
|
276 |
+ vsrc3uc = vsrcDuc; |
|
277 |
+ else |
|
278 |
+ vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
|
279 |
+ |
|
280 |
+ CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) |
|
281 |
+ } |
|
282 |
+ } |
|
283 |
+} |
|
284 |
+#endif |
|
285 |
+ |
|
286 |
+#undef noop |
|
287 |
+#undef add28 |
|
288 |
+#undef CHROMA_MC8_ALTIVEC_CORE |
... | ... |
@@ -326,13 +326,13 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, int16_t *block) |
326 | 326 |
|
327 | 327 |
#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC |
328 | 328 |
#define PREFIX_no_rnd_vc1_chroma_mc8_altivec put_no_rnd_vc1_chroma_mc8_altivec |
329 |
-#include "h264_qpel_template.c" |
|
329 |
+#include "h264chroma_template.c" |
|
330 | 330 |
#undef OP_U8_ALTIVEC |
331 | 331 |
#undef PREFIX_no_rnd_vc1_chroma_mc8_altivec |
332 | 332 |
|
333 | 333 |
#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC |
334 | 334 |
#define PREFIX_no_rnd_vc1_chroma_mc8_altivec avg_no_rnd_vc1_chroma_mc8_altivec |
335 |
-#include "h264_qpel_template.c" |
|
335 |
+#include "h264chroma_template.c" |
|
336 | 336 |
#undef OP_U8_ALTIVEC |
337 | 337 |
#undef PREFIX_no_rnd_vc1_chroma_mc8_altivec |
338 | 338 |
|
... | ... |
@@ -26,6 +26,7 @@ |
26 | 26 |
|
27 | 27 |
#include "avcodec.h" |
28 | 28 |
#include "dsputil.h" |
29 |
+#include "h264chroma.h" |
|
29 | 30 |
#include "h264qpel.h" |
30 | 31 |
#include "rv34dsp.h" |
31 | 32 |
|
... | ... |
@@ -254,9 +255,11 @@ RV30_MC(avg_, 8) |
254 | 254 |
RV30_MC(avg_, 16) |
255 | 255 |
|
256 | 256 |
av_cold void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp) { |
257 |
+ H264ChromaContext h264chroma; |
|
257 | 258 |
H264QpelContext qpel; |
258 | 259 |
|
259 | 260 |
ff_rv34dsp_init(c, dsp); |
261 |
+ ff_h264chroma_init(&h264chroma, 8); |
|
260 | 262 |
ff_h264qpel_init(&qpel, 8); |
261 | 263 |
|
262 | 264 |
c->put_pixels_tab[0][ 0] = qpel.put_h264_qpel_pixels_tab[0][0]; |
... | ... |
@@ -296,8 +299,8 @@ av_cold void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp) { |
296 | 296 |
c->avg_pixels_tab[1][ 9] = avg_rv30_tpel8_mc12_c; |
297 | 297 |
c->avg_pixels_tab[1][10] = avg_rv30_tpel8_mc22_c; |
298 | 298 |
|
299 |
- c->put_chroma_pixels_tab[0] = dsp->put_h264_chroma_pixels_tab[0]; |
|
300 |
- c->put_chroma_pixels_tab[1] = dsp->put_h264_chroma_pixels_tab[1]; |
|
301 |
- c->avg_chroma_pixels_tab[0] = dsp->avg_h264_chroma_pixels_tab[0]; |
|
302 |
- c->avg_chroma_pixels_tab[1] = dsp->avg_h264_chroma_pixels_tab[1]; |
|
299 |
+ c->put_chroma_pixels_tab[0] = h264chroma.put_h264_chroma_pixels_tab[0]; |
|
300 |
+ c->put_chroma_pixels_tab[1] = h264chroma.put_h264_chroma_pixels_tab[1]; |
|
301 |
+ c->avg_chroma_pixels_tab[0] = h264chroma.avg_h264_chroma_pixels_tab[0]; |
|
302 |
+ c->avg_chroma_pixels_tab[1] = h264chroma.avg_h264_chroma_pixels_tab[1]; |
|
303 | 303 |
} |
... | ... |
@@ -369,14 +369,6 @@ av_cold void ff_dsputil_init_align(DSPContext *c, AVCodecContext *avctx) |
369 | 369 |
/* dspfunc(avg_no_rnd_qpel, 1, 8); */ |
370 | 370 |
|
371 | 371 |
#undef dspfunc |
372 |
- if (!high_bit_depth) { |
|
373 |
- c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4; |
|
374 |
- c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4; |
|
375 |
- c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4; |
|
376 |
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4; |
|
377 |
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4; |
|
378 |
- c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4; |
|
379 |
- } |
|
380 | 372 |
|
381 | 373 |
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4; |
382 | 374 |
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4; |
383 | 375 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,132 @@ |
0 |
+/* |
|
1 |
+ * aligned/packed access motion |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> |
|
4 |
+ * |
|
5 |
+ * This file is part of Libav. |
|
6 |
+ * |
|
7 |
+ * Libav is free software; you can redistribute it and/or |
|
8 |
+ * modify it under the terms of the GNU Lesser General Public |
|
9 |
+ * License as published by the Free Software Foundation; either |
|
10 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
11 |
+ * |
|
12 |
+ * Libav is distributed in the hope that it will be useful, |
|
13 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
15 |
+ * Lesser General Public License for more details. |
|
16 |
+ * |
|
17 |
+ * You should have received a copy of the GNU Lesser General Public |
|
18 |
+ * License along with Libav; if not, write to the Free Software |
|
19 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
20 |
+ */ |
|
21 |
+ |
|
22 |
+#include <assert.h> |
|
23 |
+#include <stdint.h> |
|
24 |
+ |
|
25 |
+#include "libavutil/attributes.h" |
|
26 |
+#include "libavcodec/h264chroma.h" |
|
27 |
+ |
|
28 |
+#define H264_CHROMA_MC(OPNAME, OP)\ |
|
29 |
+static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
|
30 |
+ const int A=(8-x)*(8-y);\ |
|
31 |
+ const int B=( x)*(8-y);\ |
|
32 |
+ const int C=(8-x)*( y);\ |
|
33 |
+ const int D=( x)*( y);\ |
|
34 |
+ \ |
|
35 |
+ assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
36 |
+\ |
|
37 |
+ do {\ |
|
38 |
+ int t0,t1,t2,t3; \ |
|
39 |
+ uint8_t *s0 = src; \ |
|
40 |
+ uint8_t *s1 = src+stride; \ |
|
41 |
+ t0 = *s0++; t2 = *s1++; \ |
|
42 |
+ t1 = *s0++; t3 = *s1++; \ |
|
43 |
+ OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
44 |
+ t0 = *s0++; t2 = *s1++; \ |
|
45 |
+ OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
46 |
+ dst+= stride;\ |
|
47 |
+ src+= stride;\ |
|
48 |
+ }while(--h);\ |
|
49 |
+}\ |
|
50 |
+\ |
|
51 |
+static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
|
52 |
+ const int A=(8-x)*(8-y);\ |
|
53 |
+ const int B=( x)*(8-y);\ |
|
54 |
+ const int C=(8-x)*( y);\ |
|
55 |
+ const int D=( x)*( y);\ |
|
56 |
+ \ |
|
57 |
+ assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
58 |
+\ |
|
59 |
+ do {\ |
|
60 |
+ int t0,t1,t2,t3; \ |
|
61 |
+ uint8_t *s0 = src; \ |
|
62 |
+ uint8_t *s1 = src+stride; \ |
|
63 |
+ t0 = *s0++; t2 = *s1++; \ |
|
64 |
+ t1 = *s0++; t3 = *s1++; \ |
|
65 |
+ OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
66 |
+ t0 = *s0++; t2 = *s1++; \ |
|
67 |
+ OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
68 |
+ t1 = *s0++; t3 = *s1++; \ |
|
69 |
+ OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
70 |
+ t0 = *s0++; t2 = *s1++; \ |
|
71 |
+ OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
72 |
+ dst+= stride;\ |
|
73 |
+ src+= stride;\ |
|
74 |
+ }while(--h);\ |
|
75 |
+}\ |
|
76 |
+\ |
|
77 |
+static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
|
78 |
+ const int A=(8-x)*(8-y);\ |
|
79 |
+ const int B=( x)*(8-y);\ |
|
80 |
+ const int C=(8-x)*( y);\ |
|
81 |
+ const int D=( x)*( y);\ |
|
82 |
+ \ |
|
83 |
+ assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
84 |
+\ |
|
85 |
+ do {\ |
|
86 |
+ int t0,t1,t2,t3; \ |
|
87 |
+ uint8_t *s0 = src; \ |
|
88 |
+ uint8_t *s1 = src+stride; \ |
|
89 |
+ t0 = *s0++; t2 = *s1++; \ |
|
90 |
+ t1 = *s0++; t3 = *s1++; \ |
|
91 |
+ OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
92 |
+ t0 = *s0++; t2 = *s1++; \ |
|
93 |
+ OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
94 |
+ t1 = *s0++; t3 = *s1++; \ |
|
95 |
+ OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
96 |
+ t0 = *s0++; t2 = *s1++; \ |
|
97 |
+ OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
98 |
+ t1 = *s0++; t3 = *s1++; \ |
|
99 |
+ OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
100 |
+ t0 = *s0++; t2 = *s1++; \ |
|
101 |
+ OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
102 |
+ t1 = *s0++; t3 = *s1++; \ |
|
103 |
+ OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
104 |
+ t0 = *s0++; t2 = *s1++; \ |
|
105 |
+ OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
106 |
+ dst+= stride;\ |
|
107 |
+ src+= stride;\ |
|
108 |
+ }while(--h);\ |
|
109 |
+} |
|
110 |
+ |
|
111 |
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) |
|
112 |
+#define op_put(a, b) a = (((b) + 32)>>6) |
|
113 |
+ |
|
114 |
+H264_CHROMA_MC(put_ , op_put) |
|
115 |
+H264_CHROMA_MC(avg_ , op_avg) |
|
116 |
+#undef op_avg |
|
117 |
+#undef op_put |
|
118 |
+ |
|
119 |
+av_cold void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth) |
|
120 |
+{ |
|
121 |
+ const int high_bit_depth = bit_depth > 8; |
|
122 |
+ |
|
123 |
+ if (!high_bit_depth) { |
|
124 |
+ c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4; |
|
125 |
+ c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4; |
|
126 |
+ c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4; |
|
127 |
+ c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4; |
|
128 |
+ c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4; |
|
129 |
+ c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4; |
|
130 |
+ } |
|
131 |
+} |
... | ... |
@@ -359,97 +359,6 @@ static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y |
359 | 359 |
}while(--h); |
360 | 360 |
} |
361 | 361 |
|
362 |
-#define H264_CHROMA_MC(OPNAME, OP)\ |
|
363 |
-static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
|
364 |
- const int A=(8-x)*(8-y);\ |
|
365 |
- const int B=( x)*(8-y);\ |
|
366 |
- const int C=(8-x)*( y);\ |
|
367 |
- const int D=( x)*( y);\ |
|
368 |
- \ |
|
369 |
- assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
370 |
-\ |
|
371 |
- do {\ |
|
372 |
- int t0,t1,t2,t3; \ |
|
373 |
- uint8_t *s0 = src; \ |
|
374 |
- uint8_t *s1 = src+stride; \ |
|
375 |
- t0 = *s0++; t2 = *s1++; \ |
|
376 |
- t1 = *s0++; t3 = *s1++; \ |
|
377 |
- OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
378 |
- t0 = *s0++; t2 = *s1++; \ |
|
379 |
- OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
380 |
- dst+= stride;\ |
|
381 |
- src+= stride;\ |
|
382 |
- }while(--h);\ |
|
383 |
-}\ |
|
384 |
-\ |
|
385 |
-static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
|
386 |
- const int A=(8-x)*(8-y);\ |
|
387 |
- const int B=( x)*(8-y);\ |
|
388 |
- const int C=(8-x)*( y);\ |
|
389 |
- const int D=( x)*( y);\ |
|
390 |
- \ |
|
391 |
- assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
392 |
-\ |
|
393 |
- do {\ |
|
394 |
- int t0,t1,t2,t3; \ |
|
395 |
- uint8_t *s0 = src; \ |
|
396 |
- uint8_t *s1 = src+stride; \ |
|
397 |
- t0 = *s0++; t2 = *s1++; \ |
|
398 |
- t1 = *s0++; t3 = *s1++; \ |
|
399 |
- OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
400 |
- t0 = *s0++; t2 = *s1++; \ |
|
401 |
- OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
402 |
- t1 = *s0++; t3 = *s1++; \ |
|
403 |
- OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
404 |
- t0 = *s0++; t2 = *s1++; \ |
|
405 |
- OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
406 |
- dst+= stride;\ |
|
407 |
- src+= stride;\ |
|
408 |
- }while(--h);\ |
|
409 |
-}\ |
|
410 |
-\ |
|
411 |
-static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
|
412 |
- const int A=(8-x)*(8-y);\ |
|
413 |
- const int B=( x)*(8-y);\ |
|
414 |
- const int C=(8-x)*( y);\ |
|
415 |
- const int D=( x)*( y);\ |
|
416 |
- \ |
|
417 |
- assert(x<8 && y<8 && x>=0 && y>=0);\ |
|
418 |
-\ |
|
419 |
- do {\ |
|
420 |
- int t0,t1,t2,t3; \ |
|
421 |
- uint8_t *s0 = src; \ |
|
422 |
- uint8_t *s1 = src+stride; \ |
|
423 |
- t0 = *s0++; t2 = *s1++; \ |
|
424 |
- t1 = *s0++; t3 = *s1++; \ |
|
425 |
- OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
426 |
- t0 = *s0++; t2 = *s1++; \ |
|
427 |
- OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
428 |
- t1 = *s0++; t3 = *s1++; \ |
|
429 |
- OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
430 |
- t0 = *s0++; t2 = *s1++; \ |
|
431 |
- OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
432 |
- t1 = *s0++; t3 = *s1++; \ |
|
433 |
- OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
434 |
- t0 = *s0++; t2 = *s1++; \ |
|
435 |
- OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
436 |
- t1 = *s0++; t3 = *s1++; \ |
|
437 |
- OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\ |
|
438 |
- t0 = *s0++; t2 = *s1++; \ |
|
439 |
- OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\ |
|
440 |
- dst+= stride;\ |
|
441 |
- src+= stride;\ |
|
442 |
- }while(--h);\ |
|
443 |
-} |
|
444 |
- |
|
445 |
-#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) |
|
446 |
-#define op_put(a, b) a = (((b) + 32)>>6) |
|
447 |
- |
|
448 |
-H264_CHROMA_MC(put_ , op_put) |
|
449 |
-H264_CHROMA_MC(avg_ , op_avg) |
|
450 |
-#undef op_avg |
|
451 |
-#undef op_put |
|
452 |
- |
|
453 | 362 |
#define QPEL_MC(r, OPNAME, RND, OP) \ |
454 | 363 |
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
455 | 364 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
... | ... |
@@ -24,6 +24,7 @@ |
24 | 24 |
#define AVCODEC_VC1_H |
25 | 25 |
|
26 | 26 |
#include "avcodec.h" |
27 |
+#include "h264chroma.h" |
|
27 | 28 |
#include "mpegvideo.h" |
28 | 29 |
#include "intrax8.h" |
29 | 30 |
#include "vc1dsp.h" |
... | ... |
@@ -181,6 +182,7 @@ enum FrameCodingMode { |
181 | 181 |
typedef struct VC1Context{ |
182 | 182 |
MpegEncContext s; |
183 | 183 |
IntraX8Context x8; |
184 |
+ H264ChromaContext h264chroma; |
|
184 | 185 |
VC1DSPContext vc1dsp; |
185 | 186 |
|
186 | 187 |
int bits; |
... | ... |
@@ -31,6 +31,7 @@ |
31 | 31 |
#include "avcodec.h" |
32 | 32 |
#include "mpegvideo.h" |
33 | 33 |
#include "h263.h" |
34 |
+#include "h264chroma.h" |
|
34 | 35 |
#include "vc1.h" |
35 | 36 |
#include "vc1data.h" |
36 | 37 |
#include "vc1acdata.h" |
... | ... |
@@ -331,6 +332,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir) |
331 | 331 |
{ |
332 | 332 |
MpegEncContext *s = &v->s; |
333 | 333 |
DSPContext *dsp = &v->s.dsp; |
334 |
+ H264ChromaContext *h264chroma = &v->h264chroma; |
|
334 | 335 |
uint8_t *srcY, *srcU, *srcV; |
335 | 336 |
int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y; |
336 | 337 |
int off, off_uv; |
... | ... |
@@ -519,8 +521,8 @@ static void vc1_mc_1mv(VC1Context *v, int dir) |
519 | 519 |
uvmx = (uvmx & 3) << 1; |
520 | 520 |
uvmy = (uvmy & 3) << 1; |
521 | 521 |
if (!v->rnd) { |
522 |
- dsp->put_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy); |
|
523 |
- dsp->put_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy); |
|
522 |
+ h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy); |
|
523 |
+ h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy); |
|
524 | 524 |
} else { |
525 | 525 |
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy); |
526 | 526 |
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy); |
... | ... |
@@ -769,7 +771,7 @@ static av_always_inline int get_chroma_mv(int *mvx, int *mvy, int *a, int flag, |
769 | 769 |
static void vc1_mc_4mv_chroma(VC1Context *v, int dir) |
770 | 770 |
{ |
771 | 771 |
MpegEncContext *s = &v->s; |
772 |
- DSPContext *dsp = &v->s.dsp; |
|
772 |
+ H264ChromaContext *h264chroma = &v->h264chroma; |
|
773 | 773 |
uint8_t *srcU, *srcV; |
774 | 774 |
int uvmx, uvmy, uvsrc_x, uvsrc_y; |
775 | 775 |
int k, tx = 0, ty = 0; |
... | ... |
@@ -915,8 +917,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir) |
915 | 915 |
uvmx = (uvmx & 3) << 1; |
916 | 916 |
uvmy = (uvmy & 3) << 1; |
917 | 917 |
if (!v->rnd) { |
918 |
- dsp->put_h264_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy); |
|
919 |
- dsp->put_h264_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy); |
|
918 |
+ h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy); |
|
919 |
+ h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy); |
|
920 | 920 |
} else { |
921 | 921 |
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy); |
922 | 922 |
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy); |
... | ... |
@@ -928,7 +930,7 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir) |
928 | 928 |
static void vc1_mc_4mv_chroma4(VC1Context *v) |
929 | 929 |
{ |
930 | 930 |
MpegEncContext *s = &v->s; |
931 |
- DSPContext *dsp = &v->s.dsp; |
|
931 |
+ H264ChromaContext *h264chroma = &v->h264chroma; |
|
932 | 932 |
uint8_t *srcU, *srcV; |
933 | 933 |
int uvsrc_x, uvsrc_y; |
934 | 934 |
int uvmx_field[4], uvmy_field[4]; |
... | ... |
@@ -1000,8 +1002,8 @@ static void vc1_mc_4mv_chroma4(VC1Context *v) |
1000 | 1000 |
} |
1001 | 1001 |
} |
1002 | 1002 |
if (!v->rnd) { |
1003 |
- dsp->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]); |
|
1004 |
- dsp->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]); |
|
1003 |
+ h264chroma->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]); |
|
1004 |
+ h264chroma->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]); |
|
1005 | 1005 |
} else { |
1006 | 1006 |
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]); |
1007 | 1007 |
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]); |
... | ... |
@@ -1828,6 +1830,7 @@ static void vc1_interp_mc(VC1Context *v) |
1828 | 1828 |
{ |
1829 | 1829 |
MpegEncContext *s = &v->s; |
1830 | 1830 |
DSPContext *dsp = &v->s.dsp; |
1831 |
+ H264ChromaContext *h264chroma = &v->h264chroma; |
|
1831 | 1832 |
uint8_t *srcY, *srcU, *srcV; |
1832 | 1833 |
int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y; |
1833 | 1834 |
int off, off_uv; |
... | ... |
@@ -1957,8 +1960,8 @@ static void vc1_interp_mc(VC1Context *v) |
1957 | 1957 |
uvmx = (uvmx & 3) << 1; |
1958 | 1958 |
uvmy = (uvmy & 3) << 1; |
1959 | 1959 |
if (!v->rnd) { |
1960 |
- dsp->avg_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy); |
|
1961 |
- dsp->avg_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy); |
|
1960 |
+ h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy); |
|
1961 |
+ h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy); |
|
1962 | 1962 |
} else { |
1963 | 1963 |
v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy); |
1964 | 1964 |
v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy); |
... | ... |
@@ -5164,6 +5167,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) |
5164 | 5164 |
|
5165 | 5165 |
if (ff_vc1_init_common(v) < 0) |
5166 | 5166 |
return -1; |
5167 |
+ ff_h264chroma_init(&v->h264chroma, 8); |
|
5167 | 5168 |
ff_vc1dsp_init(&v->vc1dsp); |
5168 | 5169 |
|
5169 | 5170 |
if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) { |
... | ... |
@@ -26,7 +26,7 @@ |
26 | 26 |
#include "avcodec.h" |
27 | 27 |
#include "bytestream.h" |
28 | 28 |
#include "internal.h" |
29 |
- |
|
29 |
+#include "h264chroma.h" |
|
30 | 30 |
#include "vp56.h" |
31 | 31 |
#include "vp56data.h" |
32 | 32 |
|
... | ... |
@@ -674,6 +674,7 @@ av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha) |
674 | 674 |
avctx->pix_fmt = has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P; |
675 | 675 |
|
676 | 676 |
ff_dsputil_init(&s->dsp, avctx); |
677 |
+ ff_h264chroma_init(&s->h264chroma, 8); |
|
677 | 678 |
ff_videodsp_init(&s->vdsp, 8); |
678 | 679 |
ff_vp3dsp_init(&s->vp3dsp, avctx->flags); |
679 | 680 |
ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id); |
... | ... |
@@ -30,6 +30,7 @@ |
30 | 30 |
#include "dsputil.h" |
31 | 31 |
#include "get_bits.h" |
32 | 32 |
#include "bytestream.h" |
33 |
+#include "h264chroma.h" |
|
33 | 34 |
#include "videodsp.h" |
34 | 35 |
#include "vp3dsp.h" |
35 | 36 |
#include "vp56dsp.h" |
... | ... |
@@ -95,6 +96,7 @@ typedef struct VP56Model { |
95 | 95 |
struct vp56_context { |
96 | 96 |
AVCodecContext *avctx; |
97 | 97 |
DSPContext dsp; |
98 |
+ H264ChromaContext h264chroma; |
|
98 | 99 |
VideoDSPContext vdsp; |
99 | 100 |
VP3DSPContext vp3dsp; |
100 | 101 |
VP56DSPContext vp56dsp; |
... | ... |
@@ -536,8 +536,8 @@ static void vp6_filter_diag2(VP56Context *s, uint8_t *dst, uint8_t *src, |
536 | 536 |
int stride, int h_weight, int v_weight) |
537 | 537 |
{ |
538 | 538 |
uint8_t *tmp = s->edge_emu_buffer+16; |
539 |
- s->dsp.put_h264_chroma_pixels_tab[0](tmp, src, stride, 9, h_weight, 0); |
|
540 |
- s->dsp.put_h264_chroma_pixels_tab[0](dst, tmp, stride, 8, 0, v_weight); |
|
539 |
+ s->h264chroma.put_h264_chroma_pixels_tab[0](tmp, src, stride, 9, h_weight, 0); |
|
540 |
+ s->h264chroma.put_h264_chroma_pixels_tab[0](dst, tmp, stride, 8, 0, v_weight); |
|
541 | 541 |
} |
542 | 542 |
|
543 | 543 |
static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src, |
... | ... |
@@ -583,7 +583,7 @@ static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src, |
583 | 583 |
} |
584 | 584 |
} else { |
585 | 585 |
if (!x8 || !y8) { |
586 |
- s->dsp.put_h264_chroma_pixels_tab[0](dst, src+offset1, stride, 8, x8, y8); |
|
586 |
+ s->h264chroma.put_h264_chroma_pixels_tab[0](dst, src + offset1, stride, 8, x8, y8); |
|
587 | 587 |
} else { |
588 | 588 |
vp6_filter_diag2(s, dst, src+offset1 + ((mv.x^mv.y)>>31), stride, x8, y8); |
589 | 589 |
} |
... | ... |
@@ -43,7 +43,8 @@ YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o |
43 | 43 |
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o |
44 | 44 |
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o |
45 | 45 |
YASM-OBJS-$(CONFIG_FFT) += x86/fft.o |
46 |
-YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \ |
|
46 |
+YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264chroma_init.o \ |
|
47 |
+ x86/h264_chromamc.o \ |
|
47 | 48 |
x86/h264_chromamc_10bit.o |
48 | 49 |
YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ |
49 | 50 |
x86/h264_deblock_10bit.o \ |
... | ... |
@@ -1460,49 +1460,6 @@ void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, |
1460 | 1460 |
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, |
1461 | 1461 |
ptrdiff_t line_size, int h); |
1462 | 1462 |
|
1463 |
-void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, |
|
1464 |
- int stride, int h, int x, int y); |
|
1465 |
-void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src, |
|
1466 |
- int stride, int h, int x, int y); |
|
1467 |
-void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src, |
|
1468 |
- int stride, int h, int x, int y); |
|
1469 |
- |
|
1470 |
-void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, |
|
1471 |
- int stride, int h, int x, int y); |
|
1472 |
-void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src, |
|
1473 |
- int stride, int h, int x, int y); |
|
1474 |
-void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, |
|
1475 |
- int stride, int h, int x, int y); |
|
1476 |
- |
|
1477 |
-void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, |
|
1478 |
- int stride, int h, int x, int y); |
|
1479 |
-void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, |
|
1480 |
- int stride, int h, int x, int y); |
|
1481 |
- |
|
1482 |
-void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, |
|
1483 |
- int stride, int h, int x, int y); |
|
1484 |
-void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
|
1485 |
- int stride, int h, int x, int y); |
|
1486 |
- |
|
1487 |
-void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, |
|
1488 |
- int stride, int h, int x, int y); |
|
1489 |
-void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
|
1490 |
- int stride, int h, int x, int y); |
|
1491 |
- |
|
1492 |
-#define CHROMA_MC(OP, NUM, DEPTH, OPT) \ |
|
1493 |
-void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \ |
|
1494 |
- (uint8_t *dst, uint8_t *src, \ |
|
1495 |
- int stride, int h, int x, int y); |
|
1496 |
- |
|
1497 |
-CHROMA_MC(put, 2, 10, mmxext) |
|
1498 |
-CHROMA_MC(avg, 2, 10, mmxext) |
|
1499 |
-CHROMA_MC(put, 4, 10, mmxext) |
|
1500 |
-CHROMA_MC(avg, 4, 10, mmxext) |
|
1501 |
-CHROMA_MC(put, 8, 10, sse2) |
|
1502 |
-CHROMA_MC(avg, 8, 10, sse2) |
|
1503 |
-CHROMA_MC(put, 8, 10, avx) |
|
1504 |
-CHROMA_MC(avg, 8, 10, avx) |
|
1505 |
- |
|
1506 | 1463 |
#if HAVE_INLINE_ASM |
1507 | 1464 |
|
1508 | 1465 |
/* CAVS-specific */ |
... | ... |
@@ -1704,11 +1661,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, |
1704 | 1704 |
#endif /* HAVE_INLINE_ASM */ |
1705 | 1705 |
|
1706 | 1706 |
#if HAVE_YASM |
1707 |
- if (!high_bit_depth && CONFIG_H264CHROMA) { |
|
1708 |
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx; |
|
1709 |
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; |
|
1710 |
- } |
|
1711 |
- |
|
1712 | 1707 |
c->vector_clip_int32 = ff_vector_clip_int32_mmx; |
1713 | 1708 |
#endif |
1714 | 1709 |
|
... | ... |
@@ -1773,19 +1725,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, |
1773 | 1773 |
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; |
1774 | 1774 |
} |
1775 | 1775 |
|
1776 |
- if (!high_bit_depth && CONFIG_H264CHROMA) { |
|
1777 |
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; |
|
1778 |
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; |
|
1779 |
- c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext; |
|
1780 |
- c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext; |
|
1781 |
- } |
|
1782 |
- if (bit_depth == 10 && CONFIG_H264CHROMA) { |
|
1783 |
- c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; |
|
1784 |
- c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; |
|
1785 |
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; |
|
1786 |
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; |
|
1787 |
- } |
|
1788 |
- |
|
1789 | 1776 |
/* slower than cmov version on AMD */ |
1790 | 1777 |
if (!(mm_flags & AV_CPU_FLAG_3DNOW)) |
1791 | 1778 |
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext; |
... | ... |
@@ -1838,11 +1777,6 @@ static av_cold void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, |
1838 | 1838 |
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; |
1839 | 1839 |
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; |
1840 | 1840 |
} |
1841 |
- |
|
1842 |
- if (!high_bit_depth && CONFIG_H264CHROMA) { |
|
1843 |
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow; |
|
1844 |
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; |
|
1845 |
- } |
|
1846 | 1841 |
#endif /* HAVE_YASM */ |
1847 | 1842 |
} |
1848 | 1843 |
|
... | ... |
@@ -1889,13 +1823,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, |
1889 | 1889 |
} |
1890 | 1890 |
} |
1891 | 1891 |
|
1892 |
- if (bit_depth == 10) { |
|
1893 |
- if (CONFIG_H264CHROMA) { |
|
1894 |
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; |
|
1895 |
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; |
|
1896 |
- } |
|
1897 |
- } |
|
1898 |
- |
|
1899 | 1892 |
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; |
1900 | 1893 |
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; |
1901 | 1894 |
if (mm_flags & AV_CPU_FLAG_ATOM) { |
... | ... |
@@ -1916,14 +1843,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, |
1916 | 1916 |
int mm_flags) |
1917 | 1917 |
{ |
1918 | 1918 |
#if HAVE_SSSE3_EXTERNAL |
1919 |
- const int high_bit_depth = avctx->bits_per_raw_sample > 8; |
|
1920 |
- |
|
1921 |
- if (!high_bit_depth && CONFIG_H264CHROMA) { |
|
1922 |
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3; |
|
1923 |
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3; |
|
1924 |
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3; |
|
1925 |
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; |
|
1926 |
- } |
|
1927 | 1919 |
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; |
1928 | 1920 |
if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe |
1929 | 1921 |
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; |
... | ... |
@@ -1946,20 +1865,6 @@ static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, |
1946 | 1946 |
#endif /* HAVE_SSE4_EXTERNAL */ |
1947 | 1947 |
} |
1948 | 1948 |
|
1949 |
-static av_cold void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags) |
|
1950 |
-{ |
|
1951 |
-#if HAVE_AVX_EXTERNAL |
|
1952 |
- const int bit_depth = avctx->bits_per_raw_sample; |
|
1953 |
- |
|
1954 |
- if (bit_depth == 10) { |
|
1955 |
- if (CONFIG_H264CHROMA) { |
|
1956 |
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; |
|
1957 |
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; |
|
1958 |
- } |
|
1959 |
- } |
|
1960 |
-#endif /* HAVE_AVX_EXTERNAL */ |
|
1961 |
-} |
|
1962 |
- |
|
1963 | 1949 |
av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) |
1964 | 1950 |
{ |
1965 | 1951 |
int mm_flags = av_get_cpu_flags(); |
... | ... |
@@ -1990,9 +1895,6 @@ av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) |
1990 | 1990 |
if (mm_flags & AV_CPU_FLAG_SSE4) |
1991 | 1991 |
dsputil_init_sse4(c, avctx, mm_flags); |
1992 | 1992 |
|
1993 |
- if (mm_flags & AV_CPU_FLAG_AVX) |
|
1994 |
- dsputil_init_avx(c, avctx, mm_flags); |
|
1995 |
- |
|
1996 | 1993 |
if (CONFIG_ENCODERS) |
1997 | 1994 |
ff_dsputilenc_init_mmx(c, avctx); |
1998 | 1995 |
} |
1999 | 1996 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,116 @@ |
0 |
+/* |
|
1 |
+ * This file is part of Libav. |
|
2 |
+ * |
|
3 |
+ * Libav is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * Libav is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with Libav; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include <stdint.h> |
|
19 |
+ |
|
20 |
+#include "config.h" |
|
21 |
+#include "libavutil/cpu.h" |
|
22 |
+#include "libavutil/x86/cpu.h" |
|
23 |
+#include "libavcodec/h264chroma.h" |
|
24 |
+ |
|
25 |
+void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, |
|
26 |
+ int stride, int h, int x, int y); |
|
27 |
+void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src, |
|
28 |
+ int stride, int h, int x, int y); |
|
29 |
+void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src, |
|
30 |
+ int stride, int h, int x, int y); |
|
31 |
+ |
|
32 |
+void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, |
|
33 |
+ int stride, int h, int x, int y); |
|
34 |
+void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src, |
|
35 |
+ int stride, int h, int x, int y); |
|
36 |
+void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, |
|
37 |
+ int stride, int h, int x, int y); |
|
38 |
+ |
|
39 |
+void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, |
|
40 |
+ int stride, int h, int x, int y); |
|
41 |
+void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, |
|
42 |
+ int stride, int h, int x, int y); |
|
43 |
+ |
|
44 |
+void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, |
|
45 |
+ int stride, int h, int x, int y); |
|
46 |
+void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
|
47 |
+ int stride, int h, int x, int y); |
|
48 |
+ |
|
49 |
+void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, |
|
50 |
+ int stride, int h, int x, int y); |
|
51 |
+void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
|
52 |
+ int stride, int h, int x, int y); |
|
53 |
+ |
|
54 |
+#define CHROMA_MC(OP, NUM, DEPTH, OPT) \ |
|
55 |
+void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \ |
|
56 |
+ (uint8_t *dst, uint8_t *src, \ |
|
57 |
+ int stride, int h, int x, int y); |
|
58 |
+ |
|
59 |
+CHROMA_MC(put, 2, 10, mmxext) |
|
60 |
+CHROMA_MC(avg, 2, 10, mmxext) |
|
61 |
+CHROMA_MC(put, 4, 10, mmxext) |
|
62 |
+CHROMA_MC(avg, 4, 10, mmxext) |
|
63 |
+CHROMA_MC(put, 8, 10, sse2) |
|
64 |
+CHROMA_MC(avg, 8, 10, sse2) |
|
65 |
+CHROMA_MC(put, 8, 10, avx) |
|
66 |
+CHROMA_MC(avg, 8, 10, avx) |
|
67 |
+ |
|
68 |
+void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth) |
|
69 |
+{ |
|
70 |
+ int high_bit_depth = bit_depth > 8; |
|
71 |
+ int mm_flags = av_get_cpu_flags(); |
|
72 |
+ |
|
73 |
+ if (EXTERNAL_MMX(mm_flags) && !high_bit_depth) { |
|
74 |
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx; |
|
75 |
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; |
|
76 |
+ } |
|
77 |
+ |
|
78 |
+ if (EXTERNAL_AMD3DNOW(mm_flags) && !high_bit_depth) { |
|
79 |
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow; |
|
80 |
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; |
|
81 |
+ } |
|
82 |
+ |
|
83 |
+ if (EXTERNAL_MMXEXT(mm_flags) && !high_bit_depth) { |
|
84 |
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; |
|
85 |
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; |
|
86 |
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext; |
|
87 |
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext; |
|
88 |
+ } |
|
89 |
+ |
|
90 |
+ if (EXTERNAL_MMXEXT(mm_flags) && bit_depth == 10) { |
|
91 |
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; |
|
92 |
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; |
|
93 |
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; |
|
94 |
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; |
|
95 |
+ } |
|
96 |
+ |
|
97 |
+ if (EXTERNAL_SSE2(mm_flags) && bit_depth == 10) { |
|
98 |
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; |
|
99 |
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; |
|
100 |
+ } |
|
101 |
+ |
|
102 |
+ if (EXTERNAL_SSSE3(mm_flags) && !high_bit_depth) { |
|
103 |
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3; |
|
104 |
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3; |
|
105 |
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3; |
|
106 |
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; |
|
107 |
+ } |
|
108 |
+ |
|
109 |
+ if (EXTERNAL_AVX(mm_flags) && bit_depth == 10) { |
|
110 |
+ // AVX implies !cache64. |
|
111 |
+ // TODO: Port cache(32|64) detection from x264. |
|
112 |
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; |
|
113 |
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; |
|
114 |
+ } |
|
115 |
+} |