... | ... |
@@ -1558,6 +1558,7 @@ CONFIG_EXTRA=" |
1558 | 1558 |
mpegvideo |
1559 | 1559 |
mpegvideoenc |
1560 | 1560 |
nettle |
1561 |
+ pixblockdsp |
|
1561 | 1562 |
qpeldsp |
1562 | 1563 |
rangecoder |
1563 | 1564 |
riffdec |
... | ... |
@@ -1706,7 +1707,7 @@ threads_if_any="$THREADS_LIST" |
1706 | 1706 |
|
1707 | 1707 |
# subsystems |
1708 | 1708 |
dct_select="rdft" |
1709 |
-dsputil_select="fdctdsp idctdsp" |
|
1709 |
+dsputil_select="fdctdsp idctdsp pixblockdsp" |
|
1710 | 1710 |
error_resilience_select="dsputil" |
1711 | 1711 |
intrax8_select="error_resilience" |
1712 | 1712 |
mdct_select="fft" |
... | ... |
@@ -1715,7 +1716,7 @@ mpeg_er_select="error_resilience" |
1715 | 1715 |
mpegaudio_select="mpegaudiodsp" |
1716 | 1716 |
mpegaudiodsp_select="dct" |
1717 | 1717 |
mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp" |
1718 |
-mpegvideoenc_select="dsputil mpegvideo qpeldsp" |
|
1718 |
+mpegvideoenc_select="dsputil mpegvideo pixblockdsp qpeldsp" |
|
1719 | 1719 |
|
1720 | 1720 |
# decoders / encoders |
1721 | 1721 |
aac_decoder_select="mdct sinewin" |
... | ... |
@@ -1732,9 +1733,9 @@ amrwb_decoder_select="lsp" |
1732 | 1732 |
amv_decoder_select="sp5x_decoder" |
1733 | 1733 |
ape_decoder_select="bswapdsp" |
1734 | 1734 |
asv1_decoder_select="blockdsp bswapdsp idctdsp" |
1735 |
-asv1_encoder_select="bswapdsp dsputil fdctdsp" |
|
1735 |
+asv1_encoder_select="bswapdsp fdctdsp pixblockdsp" |
|
1736 | 1736 |
asv2_decoder_select="blockdsp bswapdsp idctdsp" |
1737 |
-asv2_encoder_select="bswapdsp dsputil fdctdsp" |
|
1737 |
+asv2_encoder_select="bswapdsp fdctdsp pixblockdsp" |
|
1738 | 1738 |
atrac1_decoder_select="mdct sinewin" |
1739 | 1739 |
atrac3_decoder_select="mdct" |
1740 | 1740 |
atrac3p_decoder_select="mdct sinewin" |
... | ... |
@@ -1749,9 +1750,9 @@ cscd_decoder_select="lzo" |
1749 | 1749 |
cscd_decoder_suggest="zlib" |
1750 | 1750 |
dca_decoder_select="mdct" |
1751 | 1751 |
dnxhd_decoder_select="blockdsp idctdsp" |
1752 |
-dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc" |
|
1752 |
+dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp" |
|
1753 | 1753 |
dvvideo_decoder_select="dvprofile idctdsp" |
1754 |
-dvvideo_encoder_select="dsputil dvprofile fdctdsp" |
|
1754 |
+dvvideo_encoder_select="dsputil dvprofile fdctdsp pixblockdsp" |
|
1755 | 1755 |
dxa_decoder_deps="zlib" |
1756 | 1756 |
eac3_decoder_select="ac3_decoder" |
1757 | 1757 |
eac3_encoder_select="ac3_encoder" |
... | ... |
@@ -72,6 +72,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ |
72 | 72 |
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ |
73 | 73 |
motion_est.o ratecontrol.o \ |
74 | 74 |
mpegvideoencdsp.o |
75 |
+OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o |
|
75 | 76 |
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o |
76 | 77 |
OBJS-$(CONFIG_RANGECODER) += rangecoder.o |
77 | 78 |
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o |
... | ... |
@@ -23,6 +23,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o |
23 | 23 |
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o |
24 | 24 |
OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o |
25 | 25 |
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o |
26 |
+OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_init_arm.o |
|
26 | 27 |
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o |
27 | 28 |
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o |
28 | 29 |
|
... | ... |
@@ -62,6 +63,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \ |
62 | 62 |
arm/simple_idct_armv6.o |
63 | 63 |
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o |
64 | 64 |
ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o |
65 |
+ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_armv6.o |
|
65 | 66 |
|
66 | 67 |
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o |
67 | 68 |
ARMV6-OBJS-$(CONFIG_VP7_DECODER) += arm/vp8_armv6.o \ |
... | ... |
@@ -20,61 +20,6 @@ |
20 | 20 |
|
21 | 21 |
#include "libavutil/arm/asm.S" |
22 | 22 |
|
23 |
-function ff_get_pixels_armv6, export=1 |
|
24 |
- pld [r1, r2] |
|
25 |
- push {r4-r8, lr} |
|
26 |
- mov lr, #8 |
|
27 |
-1: |
|
28 |
- ldrd_post r4, r5, r1, r2 |
|
29 |
- subs lr, lr, #1 |
|
30 |
- uxtb16 r6, r4 |
|
31 |
- uxtb16 r4, r4, ror #8 |
|
32 |
- uxtb16 r12, r5 |
|
33 |
- uxtb16 r8, r5, ror #8 |
|
34 |
- pld [r1, r2] |
|
35 |
- pkhbt r5, r6, r4, lsl #16 |
|
36 |
- pkhtb r6, r4, r6, asr #16 |
|
37 |
- pkhbt r7, r12, r8, lsl #16 |
|
38 |
- pkhtb r12, r8, r12, asr #16 |
|
39 |
- stm r0!, {r5,r6,r7,r12} |
|
40 |
- bgt 1b |
|
41 |
- |
|
42 |
- pop {r4-r8, pc} |
|
43 |
-endfunc |
|
44 |
- |
|
45 |
-function ff_diff_pixels_armv6, export=1 |
|
46 |
- pld [r1, r3] |
|
47 |
- pld [r2, r3] |
|
48 |
- push {r4-r9, lr} |
|
49 |
- mov lr, #8 |
|
50 |
-1: |
|
51 |
- ldrd_post r4, r5, r1, r3 |
|
52 |
- ldrd_post r6, r7, r2, r3 |
|
53 |
- uxtb16 r8, r4 |
|
54 |
- uxtb16 r4, r4, ror #8 |
|
55 |
- uxtb16 r9, r6 |
|
56 |
- uxtb16 r6, r6, ror #8 |
|
57 |
- pld [r1, r3] |
|
58 |
- ssub16 r9, r8, r9 |
|
59 |
- ssub16 r6, r4, r6 |
|
60 |
- uxtb16 r8, r5 |
|
61 |
- uxtb16 r5, r5, ror #8 |
|
62 |
- pld [r2, r3] |
|
63 |
- pkhbt r4, r9, r6, lsl #16 |
|
64 |
- pkhtb r6, r6, r9, asr #16 |
|
65 |
- uxtb16 r9, r7 |
|
66 |
- uxtb16 r7, r7, ror #8 |
|
67 |
- ssub16 r9, r8, r9 |
|
68 |
- ssub16 r5, r5, r7 |
|
69 |
- subs lr, lr, #1 |
|
70 |
- pkhbt r8, r9, r5, lsl #16 |
|
71 |
- pkhtb r9, r5, r9, asr #16 |
|
72 |
- stm r0!, {r4,r6,r8,r9} |
|
73 |
- bgt 1b |
|
74 |
- |
|
75 |
- pop {r4-r9, pc} |
|
76 |
-endfunc |
|
77 |
- |
|
78 | 23 |
function ff_pix_abs16_armv6, export=1 |
79 | 24 |
ldr r0, [sp] |
80 | 25 |
push {r4-r9, lr} |
... | ... |
@@ -26,10 +26,6 @@ |
26 | 26 |
#include "libavcodec/mpegvideo.h" |
27 | 27 |
#include "dsputil_arm.h" |
28 | 28 |
|
29 |
-void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); |
|
30 |
-void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, |
|
31 |
- const uint8_t *s2, int stride); |
|
32 |
- |
|
33 | 29 |
int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, |
34 | 30 |
int line_size, int h); |
35 | 31 |
int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, |
... | ... |
@@ -46,10 +42,6 @@ int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, |
46 | 46 |
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, |
47 | 47 |
unsigned high_bit_depth) |
48 | 48 |
{ |
49 |
- if (!high_bit_depth) |
|
50 |
- c->get_pixels = ff_get_pixels_armv6; |
|
51 |
- c->diff_pixels = ff_diff_pixels_armv6; |
|
52 |
- |
|
53 | 49 |
c->pix_abs[0][0] = ff_pix_abs16_armv6; |
54 | 50 |
c->pix_abs[0][1] = ff_pix_abs16_x2_armv6; |
55 | 51 |
c->pix_abs[0][2] = ff_pix_abs16_y2_armv6; |
56 | 52 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,76 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> |
|
2 |
+ * |
|
3 |
+ * This file is part of Libav. |
|
4 |
+ * |
|
5 |
+ * Libav is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * Libav is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with Libav; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include "libavutil/arm/asm.S" |
|
21 |
+ |
|
22 |
+function ff_get_pixels_armv6, export=1 |
|
23 |
+ pld [r1, r2] |
|
24 |
+ push {r4-r8, lr} |
|
25 |
+ mov lr, #8 |
|
26 |
+1: |
|
27 |
+ ldrd_post r4, r5, r1, r2 |
|
28 |
+ subs lr, lr, #1 |
|
29 |
+ uxtb16 r6, r4 |
|
30 |
+ uxtb16 r4, r4, ror #8 |
|
31 |
+ uxtb16 r12, r5 |
|
32 |
+ uxtb16 r8, r5, ror #8 |
|
33 |
+ pld [r1, r2] |
|
34 |
+ pkhbt r5, r6, r4, lsl #16 |
|
35 |
+ pkhtb r6, r4, r6, asr #16 |
|
36 |
+ pkhbt r7, r12, r8, lsl #16 |
|
37 |
+ pkhtb r12, r8, r12, asr #16 |
|
38 |
+ stm r0!, {r5,r6,r7,r12} |
|
39 |
+ bgt 1b |
|
40 |
+ |
|
41 |
+ pop {r4-r8, pc} |
|
42 |
+endfunc |
|
43 |
+ |
|
44 |
+function ff_diff_pixels_armv6, export=1 |
|
45 |
+ pld [r1, r3] |
|
46 |
+ pld [r2, r3] |
|
47 |
+ push {r4-r9, lr} |
|
48 |
+ mov lr, #8 |
|
49 |
+1: |
|
50 |
+ ldrd_post r4, r5, r1, r3 |
|
51 |
+ ldrd_post r6, r7, r2, r3 |
|
52 |
+ uxtb16 r8, r4 |
|
53 |
+ uxtb16 r4, r4, ror #8 |
|
54 |
+ uxtb16 r9, r6 |
|
55 |
+ uxtb16 r6, r6, ror #8 |
|
56 |
+ pld [r1, r3] |
|
57 |
+ ssub16 r9, r8, r9 |
|
58 |
+ ssub16 r6, r4, r6 |
|
59 |
+ uxtb16 r8, r5 |
|
60 |
+ uxtb16 r5, r5, ror #8 |
|
61 |
+ pld [r2, r3] |
|
62 |
+ pkhbt r4, r9, r6, lsl #16 |
|
63 |
+ pkhtb r6, r6, r9, asr #16 |
|
64 |
+ uxtb16 r9, r7 |
|
65 |
+ uxtb16 r7, r7, ror #8 |
|
66 |
+ ssub16 r9, r8, r9 |
|
67 |
+ ssub16 r5, r5, r7 |
|
68 |
+ subs lr, lr, #1 |
|
69 |
+ pkhbt r8, r9, r5, lsl #16 |
|
70 |
+ pkhtb r9, r5, r9, asr #16 |
|
71 |
+ stm r0!, {r4,r6,r8,r9} |
|
72 |
+ bgt 1b |
|
73 |
+ |
|
74 |
+ pop {r4-r9, pc} |
|
75 |
+endfunc |
0 | 76 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,42 @@ |
0 |
+/* |
|
1 |
+ * This file is part of Libav. |
|
2 |
+ * |
|
3 |
+ * Libav is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * Libav is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with Libav; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include <stdint.h> |
|
19 |
+ |
|
20 |
+#include "libavutil/attributes.h" |
|
21 |
+#include "libavutil/cpu.h" |
|
22 |
+#include "libavutil/arm/cpu.h" |
|
23 |
+#include "libavcodec/avcodec.h" |
|
24 |
+#include "libavcodec/pixblockdsp.h" |
|
25 |
+ |
|
26 |
+void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); |
|
27 |
+void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, |
|
28 |
+ const uint8_t *s2, int stride); |
|
29 |
+ |
|
30 |
+av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c, |
|
31 |
+ AVCodecContext *avctx, |
|
32 |
+ unsigned high_bit_depth) |
|
33 |
+{ |
|
34 |
+ int cpu_flags = av_get_cpu_flags(); |
|
35 |
+ |
|
36 |
+ if (have_armv6(cpu_flags)) { |
|
37 |
+ if (!high_bit_depth) |
|
38 |
+ c->get_pixels = ff_get_pixels_armv6; |
|
39 |
+ c->diff_pixels = ff_diff_pixels_armv6; |
|
40 |
+ } |
|
41 |
+} |
... | ... |
@@ -33,19 +33,19 @@ |
33 | 33 |
#include "avcodec.h" |
34 | 34 |
#include "blockdsp.h" |
35 | 35 |
#include "bswapdsp.h" |
36 |
-#include "dsputil.h" |
|
37 | 36 |
#include "fdctdsp.h" |
38 | 37 |
#include "idctdsp.h" |
39 | 38 |
#include "get_bits.h" |
39 |
+#include "pixblockdsp.h" |
|
40 | 40 |
#include "put_bits.h" |
41 | 41 |
|
42 | 42 |
typedef struct ASV1Context{ |
43 | 43 |
AVCodecContext *avctx; |
44 | 44 |
BlockDSPContext bdsp; |
45 | 45 |
BswapDSPContext bbdsp; |
46 |
- DSPContext dsp; |
|
47 | 46 |
FDCTDSPContext fdsp; |
48 | 47 |
IDCTDSPContext idsp; |
48 |
+ PixblockDSPContext pdsp; |
|
49 | 49 |
PutBitContext pb; |
50 | 50 |
GetBitContext gb; |
51 | 51 |
ScanTable scantable; |
... | ... |
@@ -159,16 +159,16 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame, |
159 | 159 |
uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; |
160 | 160 |
uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; |
161 | 161 |
|
162 |
- a->dsp.get_pixels(block[0], ptr_y , linesize); |
|
163 |
- a->dsp.get_pixels(block[1], ptr_y + 8, linesize); |
|
164 |
- a->dsp.get_pixels(block[2], ptr_y + 8*linesize , linesize); |
|
165 |
- a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize); |
|
162 |
+ a->pdsp.get_pixels(block[0], ptr_y, linesize); |
|
163 |
+ a->pdsp.get_pixels(block[1], ptr_y + 8, linesize); |
|
164 |
+ a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize, linesize); |
|
165 |
+ a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize); |
|
166 | 166 |
for(i=0; i<4; i++) |
167 | 167 |
a->fdsp.fdct(block[i]); |
168 | 168 |
|
169 | 169 |
if(!(a->avctx->flags&CODEC_FLAG_GRAY)){ |
170 |
- a->dsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); |
|
171 |
- a->dsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); |
|
170 |
+ a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); |
|
171 |
+ a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); |
|
172 | 172 |
for(i=4; i<6; i++) |
173 | 173 |
a->fdsp.fdct(block[i]); |
174 | 174 |
} |
... | ... |
@@ -248,8 +248,8 @@ static av_cold int encode_init(AVCodecContext *avctx){ |
248 | 248 |
avctx->coded_frame->key_frame = 1; |
249 | 249 |
|
250 | 250 |
ff_asv_common_init(avctx); |
251 |
- ff_dsputil_init(&a->dsp, avctx); |
|
252 | 251 |
ff_fdctdsp_init(&a->fdsp, avctx); |
252 |
+ ff_pixblockdsp_init(&a->pdsp, avctx); |
|
253 | 253 |
|
254 | 254 |
if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE; |
255 | 255 |
|
... | ... |
@@ -30,10 +30,10 @@ |
30 | 30 |
|
31 | 31 |
#include "avcodec.h" |
32 | 32 |
#include "blockdsp.h" |
33 |
-#include "dsputil.h" |
|
34 | 33 |
#include "fdctdsp.h" |
35 | 34 |
#include "internal.h" |
36 | 35 |
#include "mpegvideo.h" |
36 |
+#include "pixblockdsp.h" |
|
37 | 37 |
#include "dnxhdenc.h" |
38 | 38 |
|
39 | 39 |
// The largest value that will not lead to overflow for 10bit samples. |
... | ... |
@@ -308,10 +308,10 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) |
308 | 308 |
avctx->bits_per_raw_sample = ctx->cid_table->bit_depth; |
309 | 309 |
|
310 | 310 |
ff_blockdsp_init(&ctx->bdsp, avctx); |
311 |
- ff_dsputil_init(&ctx->m.dsp, avctx); |
|
312 | 311 |
ff_fdctdsp_init(&ctx->m.fdsp, avctx); |
313 | 312 |
ff_idctdsp_init(&ctx->m.idsp, avctx); |
314 | 313 |
ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); |
314 |
+ ff_pixblockdsp_init(&ctx->m.pdsp, avctx); |
|
315 | 315 |
ff_dct_common_init(&ctx->m); |
316 | 316 |
if (!ctx->m.dct_quantize) |
317 | 317 |
ctx->m.dct_quantize = ff_dct_quantize_c; |
... | ... |
@@ -540,12 +540,12 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) |
540 | 540 |
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); |
541 | 541 |
const uint8_t *ptr_v = ctx->thread[0]->src[2] + |
542 | 542 |
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); |
543 |
- DSPContext *dsp = &ctx->m.dsp; |
|
543 |
+ PixblockDSPContext *pdsp = &ctx->m.pdsp; |
|
544 | 544 |
|
545 |
- dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); |
|
546 |
- dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); |
|
547 |
- dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); |
|
548 |
- dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); |
|
545 |
+ pdsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); |
|
546 |
+ pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); |
|
547 |
+ pdsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); |
|
548 |
+ pdsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); |
|
549 | 549 |
|
550 | 550 |
if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { |
551 | 551 |
if (ctx->interlaced) { |
... | ... |
@@ -568,14 +568,14 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) |
568 | 568 |
ctx->bdsp.clear_block(ctx->blocks[7]); |
569 | 569 |
} |
570 | 570 |
} else { |
571 |
- dsp->get_pixels(ctx->blocks[4], |
|
572 |
- ptr_y + ctx->dct_y_offset, ctx->m.linesize); |
|
573 |
- dsp->get_pixels(ctx->blocks[5], |
|
574 |
- ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); |
|
575 |
- dsp->get_pixels(ctx->blocks[6], |
|
576 |
- ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); |
|
577 |
- dsp->get_pixels(ctx->blocks[7], |
|
578 |
- ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); |
|
571 |
+ pdsp->get_pixels(ctx->blocks[4], |
|
572 |
+ ptr_y + ctx->dct_y_offset, ctx->m.linesize); |
|
573 |
+ pdsp->get_pixels(ctx->blocks[5], |
|
574 |
+ ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); |
|
575 |
+ pdsp->get_pixels(ctx->blocks[6], |
|
576 |
+ ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); |
|
577 |
+ pdsp->get_pixels(ctx->blocks[7], |
|
578 |
+ ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); |
|
579 | 579 |
} |
580 | 580 |
} |
581 | 581 |
|
... | ... |
@@ -35,13 +35,6 @@ |
35 | 35 |
|
36 | 36 |
uint32_t ff_square_tab[512] = { 0, }; |
37 | 37 |
|
38 |
-#define BIT_DEPTH 16 |
|
39 |
-#include "dsputilenc_template.c" |
|
40 |
-#undef BIT_DEPTH |
|
41 |
- |
|
42 |
-#define BIT_DEPTH 8 |
|
43 |
-#include "dsputilenc_template.c" |
|
44 |
- |
|
45 | 38 |
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
46 | 39 |
int line_size, int h) |
47 | 40 |
{ |
... | ... |
@@ -110,27 +103,6 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
110 | 110 |
return s; |
111 | 111 |
} |
112 | 112 |
|
113 |
-static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, |
|
114 |
- const uint8_t *s2, int stride) |
|
115 |
-{ |
|
116 |
- int i; |
|
117 |
- |
|
118 |
- /* read the pixels */ |
|
119 |
- for (i = 0; i < 8; i++) { |
|
120 |
- block[0] = s1[0] - s2[0]; |
|
121 |
- block[1] = s1[1] - s2[1]; |
|
122 |
- block[2] = s1[2] - s2[2]; |
|
123 |
- block[3] = s1[3] - s2[3]; |
|
124 |
- block[4] = s1[4] - s2[4]; |
|
125 |
- block[5] = s1[5] - s2[5]; |
|
126 |
- block[6] = s1[6] - s2[6]; |
|
127 |
- block[7] = s1[7] - s2[7]; |
|
128 |
- s1 += stride; |
|
129 |
- s2 += stride; |
|
130 |
- block += 8; |
|
131 |
- } |
|
132 |
-} |
|
133 |
- |
|
134 | 113 |
static int sum_abs_dctelem_c(int16_t *block) |
135 | 114 |
{ |
136 | 115 |
int sum = 0, i; |
... | ... |
@@ -577,7 +549,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, |
577 | 577 |
|
578 | 578 |
assert(h == 8); |
579 | 579 |
|
580 |
- s->dsp.diff_pixels(temp, src1, src2, stride); |
|
580 |
+ s->pdsp.diff_pixels(temp, src1, src2, stride); |
|
581 | 581 |
s->fdsp.fdct(temp); |
582 | 582 |
return s->dsp.sum_abs_dctelem(temp); |
583 | 583 |
} |
... | ... |
@@ -617,7 +589,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, |
617 | 617 |
int16_t dct[8][8]; |
618 | 618 |
int i, sum = 0; |
619 | 619 |
|
620 |
- s->dsp.diff_pixels(dct[0], src1, src2, stride); |
|
620 |
+ s->pdsp.diff_pixels(dct[0], src1, src2, stride); |
|
621 | 621 |
|
622 | 622 |
#define SRC(x) dct[i][x] |
623 | 623 |
#define DST(x, v) dct[i][x] = v |
... | ... |
@@ -644,7 +616,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, |
644 | 644 |
|
645 | 645 |
assert(h == 8); |
646 | 646 |
|
647 |
- s->dsp.diff_pixels(temp, src1, src2, stride); |
|
647 |
+ s->pdsp.diff_pixels(temp, src1, src2, stride); |
|
648 | 648 |
s->fdsp.fdct(temp); |
649 | 649 |
|
650 | 650 |
for (i = 0; i < 64; i++) |
... | ... |
@@ -663,7 +635,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, |
663 | 663 |
assert(h == 8); |
664 | 664 |
s->mb_intra = 0; |
665 | 665 |
|
666 |
- s->dsp.diff_pixels(temp, src1, src2, stride); |
|
666 |
+ s->pdsp.diff_pixels(temp, src1, src2, stride); |
|
667 | 667 |
|
668 | 668 |
memcpy(bak, temp, 64 * sizeof(int16_t)); |
669 | 669 |
|
... | ... |
@@ -694,7 +666,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, |
694 | 694 |
copy_block8(lsrc1, src1, 8, stride, 8); |
695 | 695 |
copy_block8(lsrc2, src2, 8, stride, 8); |
696 | 696 |
|
697 |
- s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); |
|
697 |
+ s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8); |
|
698 | 698 |
|
699 | 699 |
s->block_last_index[0 /* FIXME */] = |
700 | 700 |
last = |
... | ... |
@@ -766,7 +738,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, |
766 | 766 |
|
767 | 767 |
assert(h == 8); |
768 | 768 |
|
769 |
- s->dsp.diff_pixels(temp, src1, src2, stride); |
|
769 |
+ s->pdsp.diff_pixels(temp, src1, src2, stride); |
|
770 | 770 |
|
771 | 771 |
s->block_last_index[0 /* FIXME */] = |
772 | 772 |
last = |
... | ... |
@@ -932,8 +904,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) |
932 | 932 |
{ |
933 | 933 |
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; |
934 | 934 |
|
935 |
- c->diff_pixels = diff_pixels_c; |
|
936 |
- |
|
937 | 935 |
c->sum_abs_dctelem = sum_abs_dctelem_c; |
938 | 936 |
|
939 | 937 |
/* TODO [0] 16 [1] 8 */ |
... | ... |
@@ -975,16 +945,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) |
975 | 975 |
c->nsse[0] = nsse16_c; |
976 | 976 |
c->nsse[1] = nsse8_c; |
977 | 977 |
|
978 |
- switch (avctx->bits_per_raw_sample) { |
|
979 |
- case 9: |
|
980 |
- case 10: |
|
981 |
- c->get_pixels = get_pixels_16_c; |
|
982 |
- break; |
|
983 |
- default: |
|
984 |
- c->get_pixels = get_pixels_8_c; |
|
985 |
- break; |
|
986 |
- } |
|
987 |
- |
|
988 | 978 |
if (ARCH_ARM) |
989 | 979 |
ff_dsputil_init_arm(c, avctx, high_bit_depth); |
990 | 980 |
if (ARCH_PPC) |
... | ... |
@@ -48,14 +48,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c, |
48 | 48 |
* DSPContext. |
49 | 49 |
*/ |
50 | 50 |
typedef struct DSPContext { |
51 |
- /* pixel ops : interface with DCT */ |
|
52 |
- void (*get_pixels)(int16_t *block /* align 16 */, |
|
53 |
- const uint8_t *pixels /* align 8 */, |
|
54 |
- int line_size); |
|
55 |
- void (*diff_pixels)(int16_t *block /* align 16 */, |
|
56 |
- const uint8_t *s1 /* align 8 */, |
|
57 |
- const uint8_t *s2 /* align 8 */, |
|
58 |
- int stride); |
|
59 | 51 |
int (*sum_abs_dctelem)(int16_t *block /* align 16 */); |
60 | 52 |
|
61 | 53 |
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ |
62 | 54 |
deleted file mode 100644 |
... | ... |
@@ -1,51 +0,0 @@ |
1 |
-/* |
|
2 |
- * DSP utils |
|
3 |
- * Copyright (c) 2000, 2001 Fabrice Bellard |
|
4 |
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
5 |
- * |
|
6 |
- * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
|
7 |
- * |
|
8 |
- * This file is part of Libav. |
|
9 |
- * |
|
10 |
- * Libav is free software; you can redistribute it and/or |
|
11 |
- * modify it under the terms of the GNU Lesser General Public |
|
12 |
- * License as published by the Free Software Foundation; either |
|
13 |
- * version 2.1 of the License, or (at your option) any later version. |
|
14 |
- * |
|
15 |
- * Libav is distributed in the hope that it will be useful, |
|
16 |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
17 |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
18 |
- * Lesser General Public License for more details. |
|
19 |
- * |
|
20 |
- * You should have received a copy of the GNU Lesser General Public |
|
21 |
- * License along with Libav; if not, write to the Free Software |
|
22 |
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
23 |
- */ |
|
24 |
- |
|
25 |
-/** |
|
26 |
- * @file |
|
27 |
- * DSP utils |
|
28 |
- */ |
|
29 |
- |
|
30 |
-#include "bit_depth_template.c" |
|
31 |
- |
|
32 |
-static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels, |
|
33 |
- int line_size) |
|
34 |
-{ |
|
35 |
- const pixel *pixels = (const pixel *) _pixels; |
|
36 |
- int i; |
|
37 |
- |
|
38 |
- /* read the pixels */ |
|
39 |
- for (i = 0; i < 8; i++) { |
|
40 |
- block[0] = pixels[0]; |
|
41 |
- block[1] = pixels[1]; |
|
42 |
- block[2] = pixels[2]; |
|
43 |
- block[3] = pixels[3]; |
|
44 |
- block[4] = pixels[4]; |
|
45 |
- block[5] = pixels[5]; |
|
46 |
- block[6] = pixels[6]; |
|
47 |
- block[7] = pixels[7]; |
|
48 |
- pixels += line_size / sizeof(pixel); |
|
49 |
- block += 8; |
|
50 |
- } |
|
51 |
-} |
... | ... |
@@ -31,6 +31,7 @@ |
31 | 31 |
#include "dsputil.h" |
32 | 32 |
#include "fdctdsp.h" |
33 | 33 |
#include "internal.h" |
34 |
+#include "pixblockdsp.h" |
|
34 | 35 |
#include "put_bits.h" |
35 | 36 |
#include "dv.h" |
36 | 37 |
#include "dv_tablegen.h" |
... | ... |
@@ -41,6 +42,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) |
41 | 41 |
DVVideoContext *s = avctx->priv_data; |
42 | 42 |
DSPContext dsp; |
43 | 43 |
FDCTDSPContext fdsp; |
44 |
+ PixblockDSPContext pdsp; |
|
44 | 45 |
int ret; |
45 | 46 |
|
46 | 47 |
s->sys = avpriv_dv_codec_profile(avctx); |
... | ... |
@@ -65,9 +67,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) |
65 | 65 |
|
66 | 66 |
ff_dsputil_init(&dsp, avctx); |
67 | 67 |
ff_fdctdsp_init(&fdsp, avctx); |
68 |
+ ff_pixblockdsp_init(&pdsp, avctx); |
|
68 | 69 |
ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp); |
69 | 70 |
|
70 |
- s->get_pixels = dsp.get_pixels; |
|
71 |
+ s->get_pixels = pdsp.get_pixels; |
|
71 | 72 |
s->ildct_cmp = dsp.ildct_cmp[5]; |
72 | 73 |
|
73 | 74 |
s->fdct[0] = fdsp.fdct; |
... | ... |
@@ -39,6 +39,7 @@ |
39 | 39 |
#include "idctdsp.h" |
40 | 40 |
#include "mpegvideodsp.h" |
41 | 41 |
#include "mpegvideoencdsp.h" |
42 |
+#include "pixblockdsp.h" |
|
42 | 43 |
#include "put_bits.h" |
43 | 44 |
#include "ratecontrol.h" |
44 | 45 |
#include "parser.h" |
... | ... |
@@ -361,6 +362,7 @@ typedef struct MpegEncContext { |
361 | 361 |
IDCTDSPContext idsp; |
362 | 362 |
MpegVideoDSPContext mdsp; |
363 | 363 |
MpegvideoEncDSPContext mpvencdsp; |
364 |
+ PixblockDSPContext pdsp; |
|
364 | 365 |
QpelDSPContext qdsp; |
365 | 366 |
VideoDSPContext vdsp; |
366 | 367 |
H263DSPContext h263dsp; |
... | ... |
@@ -37,7 +37,6 @@ |
37 | 37 |
#include "libavutil/timer.h" |
38 | 38 |
#include "avcodec.h" |
39 | 39 |
#include "dct.h" |
40 |
-#include "dsputil.h" |
|
41 | 40 |
#include "idctdsp.h" |
42 | 41 |
#include "mpeg12.h" |
43 | 42 |
#include "mpegvideo.h" |
... | ... |
@@ -48,6 +47,7 @@ |
48 | 48 |
#include "mpegutils.h" |
49 | 49 |
#include "mjpegenc.h" |
50 | 50 |
#include "msmpeg4.h" |
51 |
+#include "pixblockdsp.h" |
|
51 | 52 |
#include "qpeldsp.h" |
52 | 53 |
#include "faandct.h" |
53 | 54 |
#include "thread.h" |
... | ... |
@@ -703,6 +703,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) |
703 | 703 |
|
704 | 704 |
ff_fdctdsp_init(&s->fdsp, avctx); |
705 | 705 |
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); |
706 |
+ ff_pixblockdsp_init(&s->pdsp, avctx); |
|
706 | 707 |
ff_qpeldsp_init(&s->qdsp); |
707 | 708 |
|
708 | 709 |
s->avctx->coded_frame = s->current_picture.f; |
... | ... |
@@ -1943,22 +1944,22 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, |
1943 | 1943 |
} |
1944 | 1944 |
} |
1945 | 1945 |
|
1946 |
- s->dsp.get_pixels(s->block[0], ptr_y , wrap_y); |
|
1947 |
- s->dsp.get_pixels(s->block[1], ptr_y + 8 , wrap_y); |
|
1948 |
- s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y); |
|
1949 |
- s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y); |
|
1946 |
+ s->pdsp.get_pixels(s->block[0], ptr_y, wrap_y); |
|
1947 |
+ s->pdsp.get_pixels(s->block[1], ptr_y + 8, wrap_y); |
|
1948 |
+ s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset, wrap_y); |
|
1949 |
+ s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y); |
|
1950 | 1950 |
|
1951 | 1951 |
if (s->flags & CODEC_FLAG_GRAY) { |
1952 | 1952 |
skip_dct[4] = 1; |
1953 | 1953 |
skip_dct[5] = 1; |
1954 | 1954 |
} else { |
1955 |
- s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); |
|
1956 |
- s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c); |
|
1955 |
+ s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c); |
|
1956 |
+ s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c); |
|
1957 | 1957 |
if (!s->chroma_y_shift) { /* 422 */ |
1958 |
- s->dsp.get_pixels(s->block[6], |
|
1959 |
- ptr_cb + (dct_offset >> 1), wrap_c); |
|
1960 |
- s->dsp.get_pixels(s->block[7], |
|
1961 |
- ptr_cr + (dct_offset >> 1), wrap_c); |
|
1958 |
+ s->pdsp.get_pixels(s->block[6], |
|
1959 |
+ ptr_cb + (dct_offset >> 1), wrap_c); |
|
1960 |
+ s->pdsp.get_pixels(s->block[7], |
|
1961 |
+ ptr_cr + (dct_offset >> 1), wrap_c); |
|
1962 | 1962 |
} |
1963 | 1963 |
} |
1964 | 1964 |
} else { |
... | ... |
@@ -2024,24 +2025,24 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, |
2024 | 2024 |
} |
2025 | 2025 |
} |
2026 | 2026 |
|
2027 |
- s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y); |
|
2028 |
- s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); |
|
2029 |
- s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset, |
|
2030 |
- dest_y + dct_offset, wrap_y); |
|
2031 |
- s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, |
|
2032 |
- dest_y + dct_offset + 8, wrap_y); |
|
2027 |
+ s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y); |
|
2028 |
+ s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); |
|
2029 |
+ s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset, |
|
2030 |
+ dest_y + dct_offset, wrap_y); |
|
2031 |
+ s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, |
|
2032 |
+ dest_y + dct_offset + 8, wrap_y); |
|
2033 | 2033 |
|
2034 | 2034 |
if (s->flags & CODEC_FLAG_GRAY) { |
2035 | 2035 |
skip_dct[4] = 1; |
2036 | 2036 |
skip_dct[5] = 1; |
2037 | 2037 |
} else { |
2038 |
- s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); |
|
2039 |
- s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); |
|
2038 |
+ s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); |
|
2039 |
+ s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); |
|
2040 | 2040 |
if (!s->chroma_y_shift) { /* 422 */ |
2041 |
- s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1), |
|
2042 |
- dest_cb + (dct_offset >> 1), wrap_c); |
|
2043 |
- s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1), |
|
2044 |
- dest_cr + (dct_offset >> 1), wrap_c); |
|
2041 |
+ s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1), |
|
2042 |
+ dest_cb + (dct_offset >> 1), wrap_c); |
|
2043 |
+ s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1), |
|
2044 |
+ dest_cr + (dct_offset >> 1), wrap_c); |
|
2045 | 2045 |
} |
2046 | 2046 |
} |
2047 | 2047 |
/* pre quantization */ |
2048 | 2048 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,76 @@ |
0 |
+/* |
|
1 |
+ * This file is part of Libav. |
|
2 |
+ * |
|
3 |
+ * Libav is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * Libav is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with Libav; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include <stdint.h> |
|
19 |
+ |
|
20 |
+#include "config.h" |
|
21 |
+#include "libavutil/attributes.h" |
|
22 |
+#include "avcodec.h" |
|
23 |
+#include "pixblockdsp.h" |
|
24 |
+ |
|
25 |
+#define BIT_DEPTH 16 |
|
26 |
+#include "pixblockdsp_template.c" |
|
27 |
+#undef BIT_DEPTH |
|
28 |
+ |
|
29 |
+#define BIT_DEPTH 8 |
|
30 |
+#include "pixblockdsp_template.c" |
|
31 |
+ |
|
32 |
+static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, |
|
33 |
+ const uint8_t *s2, int stride) |
|
34 |
+{ |
|
35 |
+ int i; |
|
36 |
+ |
|
37 |
+ /* read the pixels */ |
|
38 |
+ for (i = 0; i < 8; i++) { |
|
39 |
+ block[0] = s1[0] - s2[0]; |
|
40 |
+ block[1] = s1[1] - s2[1]; |
|
41 |
+ block[2] = s1[2] - s2[2]; |
|
42 |
+ block[3] = s1[3] - s2[3]; |
|
43 |
+ block[4] = s1[4] - s2[4]; |
|
44 |
+ block[5] = s1[5] - s2[5]; |
|
45 |
+ block[6] = s1[6] - s2[6]; |
|
46 |
+ block[7] = s1[7] - s2[7]; |
|
47 |
+ s1 += stride; |
|
48 |
+ s2 += stride; |
|
49 |
+ block += 8; |
|
50 |
+ } |
|
51 |
+} |
|
52 |
+ |
|
53 |
+av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) |
|
54 |
+{ |
|
55 |
+ const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; |
|
56 |
+ |
|
57 |
+ c->diff_pixels = diff_pixels_c; |
|
58 |
+ |
|
59 |
+ switch (avctx->bits_per_raw_sample) { |
|
60 |
+ case 9: |
|
61 |
+ case 10: |
|
62 |
+ c->get_pixels = get_pixels_16_c; |
|
63 |
+ break; |
|
64 |
+ default: |
|
65 |
+ c->get_pixels = get_pixels_8_c; |
|
66 |
+ break; |
|
67 |
+ } |
|
68 |
+ |
|
69 |
+ if (ARCH_ARM) |
|
70 |
+ ff_pixblockdsp_init_arm(c, avctx, high_bit_depth); |
|
71 |
+ if (ARCH_PPC) |
|
72 |
+ ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth); |
|
73 |
+ if (ARCH_X86) |
|
74 |
+ ff_pixblockdsp_init_x86(c, avctx, high_bit_depth); |
|
75 |
+} |
0 | 76 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,44 @@ |
0 |
+/* |
|
1 |
+ * This file is part of Libav. |
|
2 |
+ * |
|
3 |
+ * Libav is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * Libav is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with Libav; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#ifndef AVCODEC_PIXBLOCKDSP_H |
|
19 |
+#define AVCODEC_PIXBLOCKDSP_H |
|
20 |
+ |
|
21 |
+#include <stdint.h> |
|
22 |
+ |
|
23 |
+#include "avcodec.h" |
|
24 |
+ |
|
25 |
+typedef struct PixblockDSPContext { |
|
26 |
+ void (*get_pixels)(int16_t *block /* align 16 */, |
|
27 |
+ const uint8_t *pixels /* align 8 */, |
|
28 |
+ int line_size); |
|
29 |
+ void (*diff_pixels)(int16_t *block /* align 16 */, |
|
30 |
+ const uint8_t *s1 /* align 8 */, |
|
31 |
+ const uint8_t *s2 /* align 8 */, |
|
32 |
+ int stride); |
|
33 |
+} PixblockDSPContext; |
|
34 |
+ |
|
35 |
+void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); |
|
36 |
+void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, |
|
37 |
+ unsigned high_bit_depth); |
|
38 |
+void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, |
|
39 |
+ unsigned high_bit_depth); |
|
40 |
+void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, |
|
41 |
+ unsigned high_bit_depth); |
|
42 |
+ |
|
43 |
+#endif /* AVCODEC_PIXBLOCKDSP_H */ |
0 | 44 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,40 @@ |
0 |
+/* |
|
1 |
+ * This file is part of Libav. |
|
2 |
+ * |
|
3 |
+ * Libav is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * Libav is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with Libav; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include "bit_depth_template.c" |
|
19 |
+ |
|
20 |
+static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels, |
|
21 |
+ int line_size) |
|
22 |
+{ |
|
23 |
+ const pixel *pixels = (const pixel *) _pixels; |
|
24 |
+ int i; |
|
25 |
+ |
|
26 |
+ /* read the pixels */ |
|
27 |
+ for (i = 0; i < 8; i++) { |
|
28 |
+ block[0] = pixels[0]; |
|
29 |
+ block[1] = pixels[1]; |
|
30 |
+ block[2] = pixels[2]; |
|
31 |
+ block[3] = pixels[3]; |
|
32 |
+ block[4] = pixels[4]; |
|
33 |
+ block[5] = pixels[5]; |
|
34 |
+ block[6] = pixels[6]; |
|
35 |
+ block[7] = pixels[7]; |
|
36 |
+ pixels += line_size / sizeof(pixel); |
|
37 |
+ block += 8; |
|
38 |
+ } |
|
39 |
+} |
... | ... |
@@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o |
15 | 15 |
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ |
16 | 16 |
ppc/mpegvideodsp.o |
17 | 17 |
OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o |
18 |
+OBJS-$(CONFIG_PIXBLOCKDSP) += ppc/pixblockdsp.o |
|
18 | 19 |
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o |
19 | 20 |
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o |
20 | 21 |
|
... | ... |
@@ -402,105 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
402 | 402 |
return s; |
403 | 403 |
} |
404 | 404 |
|
405 |
-static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, |
|
406 |
- int line_size) |
|
407 |
-{ |
|
408 |
- int i; |
|
409 |
- vector unsigned char perm = vec_lvsl(0, pixels); |
|
410 |
- const vector unsigned char zero = |
|
411 |
- (const vector unsigned char) vec_splat_u8(0); |
|
412 |
- |
|
413 |
- for (i = 0; i < 8; i++) { |
|
414 |
- /* Read potentially unaligned pixels. |
|
415 |
- * We're reading 16 pixels, and actually only want 8, |
|
416 |
- * but we simply ignore the extras. */ |
|
417 |
- vector unsigned char pixl = vec_ld(0, pixels); |
|
418 |
- vector unsigned char pixr = vec_ld(7, pixels); |
|
419 |
- vector unsigned char bytes = vec_perm(pixl, pixr, perm); |
|
420 |
- |
|
421 |
- // Convert the bytes into shorts. |
|
422 |
- vector signed short shorts = (vector signed short) vec_mergeh(zero, |
|
423 |
- bytes); |
|
424 |
- |
|
425 |
- // Save the data to the block, we assume the block is 16-byte aligned. |
|
426 |
- vec_st(shorts, i * 16, (vector signed short *) block); |
|
427 |
- |
|
428 |
- pixels += line_size; |
|
429 |
- } |
|
430 |
-} |
|
431 |
- |
|
432 |
-static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, |
|
433 |
- const uint8_t *s2, int stride) |
|
434 |
-{ |
|
435 |
- int i; |
|
436 |
- vector unsigned char perm1 = vec_lvsl(0, s1); |
|
437 |
- vector unsigned char perm2 = vec_lvsl(0, s2); |
|
438 |
- const vector unsigned char zero = |
|
439 |
- (const vector unsigned char) vec_splat_u8(0); |
|
440 |
- vector signed short shorts1, shorts2; |
|
441 |
- |
|
442 |
- for (i = 0; i < 4; i++) { |
|
443 |
- /* Read potentially unaligned pixels. |
|
444 |
- * We're reading 16 pixels, and actually only want 8, |
|
445 |
- * but we simply ignore the extras. */ |
|
446 |
- vector unsigned char pixl = vec_ld(0, s1); |
|
447 |
- vector unsigned char pixr = vec_ld(15, s1); |
|
448 |
- vector unsigned char bytes = vec_perm(pixl, pixr, perm1); |
|
449 |
- |
|
450 |
- // Convert the bytes into shorts. |
|
451 |
- shorts1 = (vector signed short) vec_mergeh(zero, bytes); |
|
452 |
- |
|
453 |
- // Do the same for the second block of pixels. |
|
454 |
- pixl = vec_ld(0, s2); |
|
455 |
- pixr = vec_ld(15, s2); |
|
456 |
- bytes = vec_perm(pixl, pixr, perm2); |
|
457 |
- |
|
458 |
- // Convert the bytes into shorts. |
|
459 |
- shorts2 = (vector signed short) vec_mergeh(zero, bytes); |
|
460 |
- |
|
461 |
- // Do the subtraction. |
|
462 |
- shorts1 = vec_sub(shorts1, shorts2); |
|
463 |
- |
|
464 |
- // Save the data to the block, we assume the block is 16-byte aligned. |
|
465 |
- vec_st(shorts1, 0, (vector signed short *) block); |
|
466 |
- |
|
467 |
- s1 += stride; |
|
468 |
- s2 += stride; |
|
469 |
- block += 8; |
|
470 |
- |
|
471 |
- /* The code below is a copy of the code above... |
|
472 |
- * This is a manual unroll. */ |
|
473 |
- |
|
474 |
- /* Read potentially unaligned pixels. |
|
475 |
- * We're reading 16 pixels, and actually only want 8, |
|
476 |
- * but we simply ignore the extras. */ |
|
477 |
- pixl = vec_ld(0, s1); |
|
478 |
- pixr = vec_ld(15, s1); |
|
479 |
- bytes = vec_perm(pixl, pixr, perm1); |
|
480 |
- |
|
481 |
- // Convert the bytes into shorts. |
|
482 |
- shorts1 = (vector signed short) vec_mergeh(zero, bytes); |
|
483 |
- |
|
484 |
- // Do the same for the second block of pixels. |
|
485 |
- pixl = vec_ld(0, s2); |
|
486 |
- pixr = vec_ld(15, s2); |
|
487 |
- bytes = vec_perm(pixl, pixr, perm2); |
|
488 |
- |
|
489 |
- // Convert the bytes into shorts. |
|
490 |
- shorts2 = (vector signed short) vec_mergeh(zero, bytes); |
|
491 |
- |
|
492 |
- // Do the subtraction. |
|
493 |
- shorts1 = vec_sub(shorts1, shorts2); |
|
494 |
- |
|
495 |
- // Save the data to the block, we assume the block is 16-byte aligned. |
|
496 |
- vec_st(shorts1, 0, (vector signed short *) block); |
|
497 |
- |
|
498 |
- s1 += stride; |
|
499 |
- s2 += stride; |
|
500 |
- block += 8; |
|
501 |
- } |
|
502 |
-} |
|
503 |
- |
|
504 | 405 |
static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, |
505 | 406 |
uint8_t *src, int stride, int h) |
506 | 407 |
{ |
... | ... |
@@ -854,12 +755,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, |
854 | 854 |
c->sse[0] = sse16_altivec; |
855 | 855 |
c->sse[1] = sse8_altivec; |
856 | 856 |
|
857 |
- c->diff_pixels = diff_pixels_altivec; |
|
858 |
- |
|
859 |
- if (!high_bit_depth) { |
|
860 |
- c->get_pixels = get_pixels_altivec; |
|
861 |
- } |
|
862 |
- |
|
863 | 857 |
c->hadamard8_diff[0] = hadamard8_diff16_altivec; |
864 | 858 |
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; |
865 | 859 |
} |
866 | 860 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,153 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2002 Brian Foley |
|
2 |
+ * Copyright (c) 2002 Dieter Shirley |
|
3 |
+ * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
|
4 |
+ * |
|
5 |
+ * This file is part of Libav. |
|
6 |
+ * |
|
7 |
+ * Libav is free software; you can redistribute it and/or |
|
8 |
+ * modify it under the terms of the GNU Lesser General Public |
|
9 |
+ * License as published by the Free Software Foundation; either |
|
10 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
11 |
+ * |
|
12 |
+ * Libav is distributed in the hope that it will be useful, |
|
13 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
15 |
+ * Lesser General Public License for more details. |
|
16 |
+ * |
|
17 |
+ * You should have received a copy of the GNU Lesser General Public |
|
18 |
+ * License along with Libav; if not, write to the Free Software |
|
19 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
20 |
+ */ |
|
21 |
+ |
|
22 |
+#include "config.h" |
|
23 |
+#if HAVE_ALTIVEC_H |
|
24 |
+#include <altivec.h> |
|
25 |
+#endif |
|
26 |
+ |
|
27 |
+#include "libavutil/attributes.h" |
|
28 |
+#include "libavutil/cpu.h" |
|
29 |
+#include "libavutil/ppc/cpu.h" |
|
30 |
+#include "libavutil/ppc/types_altivec.h" |
|
31 |
+#include "libavutil/ppc/util_altivec.h" |
|
32 |
+#include "libavcodec/avcodec.h" |
|
33 |
+#include "libavcodec/pixblockdsp.h" |
|
34 |
+ |
|
35 |
+#if HAVE_ALTIVEC |
|
36 |
+ |
|
37 |
+static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, |
|
38 |
+ int line_size) |
|
39 |
+{ |
|
40 |
+ int i; |
|
41 |
+ vector unsigned char perm = vec_lvsl(0, pixels); |
|
42 |
+ const vector unsigned char zero = |
|
43 |
+ (const vector unsigned char) vec_splat_u8(0); |
|
44 |
+ |
|
45 |
+ for (i = 0; i < 8; i++) { |
|
46 |
+ /* Read potentially unaligned pixels. |
|
47 |
+ * We're reading 16 pixels, and actually only want 8, |
|
48 |
+ * but we simply ignore the extras. */ |
|
49 |
+ vector unsigned char pixl = vec_ld(0, pixels); |
|
50 |
+ vector unsigned char pixr = vec_ld(7, pixels); |
|
51 |
+ vector unsigned char bytes = vec_perm(pixl, pixr, perm); |
|
52 |
+ |
|
53 |
+ // Convert the bytes into shorts. |
|
54 |
+ vector signed short shorts = (vector signed short) vec_mergeh(zero, |
|
55 |
+ bytes); |
|
56 |
+ |
|
57 |
+ // Save the data to the block, we assume the block is 16-byte aligned. |
|
58 |
+ vec_st(shorts, i * 16, (vector signed short *) block); |
|
59 |
+ |
|
60 |
+ pixels += line_size; |
|
61 |
+ } |
|
62 |
+} |
|
63 |
+ |
|
64 |
+static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, |
|
65 |
+ const uint8_t *s2, int stride) |
|
66 |
+{ |
|
67 |
+ int i; |
|
68 |
+ vector unsigned char perm1 = vec_lvsl(0, s1); |
|
69 |
+ vector unsigned char perm2 = vec_lvsl(0, s2); |
|
70 |
+ const vector unsigned char zero = |
|
71 |
+ (const vector unsigned char) vec_splat_u8(0); |
|
72 |
+ vector signed short shorts1, shorts2; |
|
73 |
+ |
|
74 |
+ for (i = 0; i < 4; i++) { |
|
75 |
+ /* Read potentially unaligned pixels. |
|
76 |
+ * We're reading 16 pixels, and actually only want 8, |
|
77 |
+ * but we simply ignore the extras. */ |
|
78 |
+ vector unsigned char pixl = vec_ld(0, s1); |
|
79 |
+ vector unsigned char pixr = vec_ld(15, s1); |
|
80 |
+ vector unsigned char bytes = vec_perm(pixl, pixr, perm1); |
|
81 |
+ |
|
82 |
+ // Convert the bytes into shorts. |
|
83 |
+ shorts1 = (vector signed short) vec_mergeh(zero, bytes); |
|
84 |
+ |
|
85 |
+ // Do the same for the second block of pixels. |
|
86 |
+ pixl = vec_ld(0, s2); |
|
87 |
+ pixr = vec_ld(15, s2); |
|
88 |
+ bytes = vec_perm(pixl, pixr, perm2); |
|
89 |
+ |
|
90 |
+ // Convert the bytes into shorts. |
|
91 |
+ shorts2 = (vector signed short) vec_mergeh(zero, bytes); |
|
92 |
+ |
|
93 |
+ // Do the subtraction. |
|
94 |
+ shorts1 = vec_sub(shorts1, shorts2); |
|
95 |
+ |
|
96 |
+ // Save the data to the block, we assume the block is 16-byte aligned. |
|
97 |
+ vec_st(shorts1, 0, (vector signed short *) block); |
|
98 |
+ |
|
99 |
+ s1 += stride; |
|
100 |
+ s2 += stride; |
|
101 |
+ block += 8; |
|
102 |
+ |
|
103 |
+ /* The code below is a copy of the code above... |
|
104 |
+ * This is a manual unroll. */ |
|
105 |
+ |
|
106 |
+ /* Read potentially unaligned pixels. |
|
107 |
+ * We're reading 16 pixels, and actually only want 8, |
|
108 |
+ * but we simply ignore the extras. */ |
|
109 |
+ pixl = vec_ld(0, s1); |
|
110 |
+ pixr = vec_ld(15, s1); |
|
111 |
+ bytes = vec_perm(pixl, pixr, perm1); |
|
112 |
+ |
|
113 |
+ // Convert the bytes into shorts. |
|
114 |
+ shorts1 = (vector signed short) vec_mergeh(zero, bytes); |
|
115 |
+ |
|
116 |
+ // Do the same for the second block of pixels. |
|
117 |
+ pixl = vec_ld(0, s2); |
|
118 |
+ pixr = vec_ld(15, s2); |
|
119 |
+ bytes = vec_perm(pixl, pixr, perm2); |
|
120 |
+ |
|
121 |
+ // Convert the bytes into shorts. |
|
122 |
+ shorts2 = (vector signed short) vec_mergeh(zero, bytes); |
|
123 |
+ |
|
124 |
+ // Do the subtraction. |
|
125 |
+ shorts1 = vec_sub(shorts1, shorts2); |
|
126 |
+ |
|
127 |
+ // Save the data to the block, we assume the block is 16-byte aligned. |
|
128 |
+ vec_st(shorts1, 0, (vector signed short *) block); |
|
129 |
+ |
|
130 |
+ s1 += stride; |
|
131 |
+ s2 += stride; |
|
132 |
+ block += 8; |
|
133 |
+ } |
|
134 |
+} |
|
135 |
+ |
|
136 |
+#endif /* HAVE_ALTIVEC */ |
|
137 |
+ |
|
138 |
+av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, |
|
139 |
+ AVCodecContext *avctx, |
|
140 |
+ unsigned high_bit_depth) |
|
141 |
+{ |
|
142 |
+#if HAVE_ALTIVEC |
|
143 |
+ if (!PPC_ALTIVEC(av_get_cpu_flags())) |
|
144 |
+ return; |
|
145 |
+ |
|
146 |
+ c->diff_pixels = diff_pixels_altivec; |
|
147 |
+ |
|
148 |
+ if (!high_bit_depth) { |
|
149 |
+ c->get_pixels = get_pixels_altivec; |
|
150 |
+ } |
|
151 |
+#endif /* HAVE_ALTIVEC */ |
|
152 |
+} |
... | ... |
@@ -26,6 +26,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ |
26 | 26 |
x86/mpegvideodsp.o |
27 | 27 |
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \ |
28 | 28 |
x86/mpegvideoencdsp_init.o |
29 |
+OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp_init.o |
|
29 | 30 |
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o |
30 | 31 |
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o |
31 | 32 |
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o |
... | ... |
@@ -93,6 +94,7 @@ YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \ |
93 | 93 |
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o |
94 | 94 |
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o |
95 | 95 |
YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o |
96 |
+YASM-OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp.o |
|
96 | 97 |
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \ |
97 | 98 |
x86/fpel.o \ |
98 | 99 |
x86/qpel.o |
... | ... |
@@ -334,87 +334,3 @@ cglobal sse16, 5, 5, 8 |
334 | 334 |
paddd m7, m1 |
335 | 335 |
movd eax, m7 ; return value |
336 | 336 |
RET |
337 |
- |
|
338 |
-INIT_MMX mmx |
|
339 |
-; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) |
|
340 |
-cglobal get_pixels, 3,4 |
|
341 |
- movsxdifnidn r2, r2d |
|
342 |
- add r0, 128 |
|
343 |
- mov r3, -128 |
|
344 |
- pxor m7, m7 |
|
345 |
-.loop: |
|
346 |
- mova m0, [r1] |
|
347 |
- mova m2, [r1+r2] |
|
348 |
- mova m1, m0 |
|
349 |
- mova m3, m2 |
|
350 |
- punpcklbw m0, m7 |
|
351 |
- punpckhbw m1, m7 |
|
352 |
- punpcklbw m2, m7 |
|
353 |
- punpckhbw m3, m7 |
|
354 |
- mova [r0+r3+ 0], m0 |
|
355 |
- mova [r0+r3+ 8], m1 |
|
356 |
- mova [r0+r3+16], m2 |
|
357 |
- mova [r0+r3+24], m3 |
|
358 |
- lea r1, [r1+r2*2] |
|
359 |
- add r3, 32 |
|
360 |
- js .loop |
|
361 |
- REP_RET |
|
362 |
- |
|
363 |
-INIT_XMM sse2 |
|
364 |
-cglobal get_pixels, 3, 4 |
|
365 |
- movsxdifnidn r2, r2d |
|
366 |
- lea r3, [r2*3] |
|
367 |
- pxor m4, m4 |
|
368 |
- movh m0, [r1] |
|
369 |
- movh m1, [r1+r2] |
|
370 |
- movh m2, [r1+r2*2] |
|
371 |
- movh m3, [r1+r3] |
|
372 |
- lea r1, [r1+r2*4] |
|
373 |
- punpcklbw m0, m4 |
|
374 |
- punpcklbw m1, m4 |
|
375 |
- punpcklbw m2, m4 |
|
376 |
- punpcklbw m3, m4 |
|
377 |
- mova [r0], m0 |
|
378 |
- mova [r0+0x10], m1 |
|
379 |
- mova [r0+0x20], m2 |
|
380 |
- mova [r0+0x30], m3 |
|
381 |
- movh m0, [r1] |
|
382 |
- movh m1, [r1+r2*1] |
|
383 |
- movh m2, [r1+r2*2] |
|
384 |
- movh m3, [r1+r3] |
|
385 |
- punpcklbw m0, m4 |
|
386 |
- punpcklbw m1, m4 |
|
387 |
- punpcklbw m2, m4 |
|
388 |
- punpcklbw m3, m4 |
|
389 |
- mova [r0+0x40], m0 |
|
390 |
- mova [r0+0x50], m1 |
|
391 |
- mova [r0+0x60], m2 |
|
392 |
- mova [r0+0x70], m3 |
|
393 |
- RET |
|
394 |
- |
|
395 |
-INIT_MMX mmx |
|
396 |
-; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, |
|
397 |
-; int stride); |
|
398 |
-cglobal diff_pixels, 4,5 |
|
399 |
- movsxdifnidn r3, r3d |
|
400 |
- pxor m7, m7 |
|
401 |
- add r0, 128 |
|
402 |
- mov r4, -128 |
|
403 |
-.loop: |
|
404 |
- mova m0, [r1] |
|
405 |
- mova m2, [r2] |
|
406 |
- mova m1, m0 |
|
407 |
- mova m3, m2 |
|
408 |
- punpcklbw m0, m7 |
|
409 |
- punpckhbw m1, m7 |
|
410 |
- punpcklbw m2, m7 |
|
411 |
- punpckhbw m3, m7 |
|
412 |
- psubw m0, m2 |
|
413 |
- psubw m1, m3 |
|
414 |
- mova [r0+r4+0], m0 |
|
415 |
- mova [r0+r4+8], m1 |
|
416 |
- add r1, r3 |
|
417 |
- add r2, r3 |
|
418 |
- add r4, 16 |
|
419 |
- jne .loop |
|
420 |
- REP_RET |
... | ... |
@@ -30,11 +30,6 @@ |
30 | 30 |
#include "libavcodec/mpegvideo.h" |
31 | 31 |
#include "dsputil_x86.h" |
32 | 32 |
|
33 |
-void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); |
|
34 |
-void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); |
|
35 |
-void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, |
|
36 |
- int stride); |
|
37 |
- |
|
38 | 33 |
#if HAVE_INLINE_ASM |
39 | 34 |
|
40 | 35 |
static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
... | ... |
@@ -823,16 +818,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, |
823 | 823 |
{ |
824 | 824 |
int cpu_flags = av_get_cpu_flags(); |
825 | 825 |
|
826 |
- if (EXTERNAL_MMX(cpu_flags)) { |
|
827 |
- if (!high_bit_depth) |
|
828 |
- c->get_pixels = ff_get_pixels_mmx; |
|
829 |
- c->diff_pixels = ff_diff_pixels_mmx; |
|
830 |
- } |
|
831 |
- |
|
832 |
- if (EXTERNAL_SSE2(cpu_flags)) |
|
833 |
- if (!high_bit_depth) |
|
834 |
- c->get_pixels = ff_get_pixels_sse2; |
|
835 |
- |
|
836 | 826 |
#if HAVE_INLINE_ASM |
837 | 827 |
if (INLINE_MMX(cpu_flags)) { |
838 | 828 |
c->sum_abs_dctelem = sum_abs_dctelem_mmx; |
839 | 829 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,110 @@ |
0 |
+;***************************************************************************** |
|
1 |
+;* SIMD-optimized pixel operations |
|
2 |
+;***************************************************************************** |
|
3 |
+;* Copyright (c) 2000, 2001 Fabrice Bellard |
|
4 |
+;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
5 |
+;* |
|
6 |
+;* This file is part of Libav. |
|
7 |
+;* |
|
8 |
+;* Libav is free software; you can redistribute it and/or |
|
9 |
+;* modify it under the terms of the GNU Lesser General Public |
|
10 |
+;* License as published by the Free Software Foundation; either |
|
11 |
+;* version 2.1 of the License, or (at your option) any later version. |
|
12 |
+;* |
|
13 |
+;* Libav is distributed in the hope that it will be useful, |
|
14 |
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
15 |
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
16 |
+;* Lesser General Public License for more details. |
|
17 |
+;* |
|
18 |
+;* You should have received a copy of the GNU Lesser General Public |
|
19 |
+;* License along with Libav; if not, write to the Free Software |
|
20 |
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
21 |
+;***************************************************************************** |
|
22 |
+ |
|
23 |
+%include "libavutil/x86/x86util.asm" |
|
24 |
+ |
|
25 |
+SECTION .text |
|
26 |
+ |
|
27 |
+INIT_MMX mmx |
|
28 |
+; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) |
|
29 |
+cglobal get_pixels, 3,4 |
|
30 |
+ movsxdifnidn r2, r2d |
|
31 |
+ add r0, 128 |
|
32 |
+ mov r3, -128 |
|
33 |
+ pxor m7, m7 |
|
34 |
+.loop: |
|
35 |
+ mova m0, [r1] |
|
36 |
+ mova m2, [r1+r2] |
|
37 |
+ mova m1, m0 |
|
38 |
+ mova m3, m2 |
|
39 |
+ punpcklbw m0, m7 |
|
40 |
+ punpckhbw m1, m7 |
|
41 |
+ punpcklbw m2, m7 |
|
42 |
+ punpckhbw m3, m7 |
|
43 |
+ mova [r0+r3+ 0], m0 |
|
44 |
+ mova [r0+r3+ 8], m1 |
|
45 |
+ mova [r0+r3+16], m2 |
|
46 |
+ mova [r0+r3+24], m3 |
|
47 |
+ lea r1, [r1+r2*2] |
|
48 |
+ add r3, 32 |
|
49 |
+ js .loop |
|
50 |
+ REP_RET |
|
51 |
+ |
|
52 |
+INIT_XMM sse2 |
|
53 |
+cglobal get_pixels, 3, 4 |
|
54 |
+ movsxdifnidn r2, r2d |
|
55 |
+ lea r3, [r2*3] |
|
56 |
+ pxor m4, m4 |
|
57 |
+ movh m0, [r1] |
|
58 |
+ movh m1, [r1+r2] |
|
59 |
+ movh m2, [r1+r2*2] |
|
60 |
+ movh m3, [r1+r3] |
|
61 |
+ lea r1, [r1+r2*4] |
|
62 |
+ punpcklbw m0, m4 |
|
63 |
+ punpcklbw m1, m4 |
|
64 |
+ punpcklbw m2, m4 |
|
65 |
+ punpcklbw m3, m4 |
|
66 |
+ mova [r0], m0 |
|
67 |
+ mova [r0+0x10], m1 |
|
68 |
+ mova [r0+0x20], m2 |
|
69 |
+ mova [r0+0x30], m3 |
|
70 |
+ movh m0, [r1] |
|
71 |
+ movh m1, [r1+r2*1] |
|
72 |
+ movh m2, [r1+r2*2] |
|
73 |
+ movh m3, [r1+r3] |
|
74 |
+ punpcklbw m0, m4 |
|
75 |
+ punpcklbw m1, m4 |
|
76 |
+ punpcklbw m2, m4 |
|
77 |
+ punpcklbw m3, m4 |
|
78 |
+ mova [r0+0x40], m0 |
|
79 |
+ mova [r0+0x50], m1 |
|
80 |
+ mova [r0+0x60], m2 |
|
81 |
+ mova [r0+0x70], m3 |
|
82 |
+ RET |
|
83 |
+ |
|
84 |
+INIT_MMX mmx |
|
85 |
+; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, |
|
86 |
+; int stride); |
|
87 |
+cglobal diff_pixels, 4,5 |
|
88 |
+ movsxdifnidn r3, r3d |
|
89 |
+ pxor m7, m7 |
|
90 |
+ add r0, 128 |
|
91 |
+ mov r4, -128 |
|
92 |
+.loop: |
|
93 |
+ mova m0, [r1] |
|
94 |
+ mova m2, [r2] |
|
95 |
+ mova m1, m0 |
|
96 |
+ mova m3, m2 |
|
97 |
+ punpcklbw m0, m7 |
|
98 |
+ punpckhbw m1, m7 |
|
99 |
+ punpcklbw m2, m7 |
|
100 |
+ punpckhbw m3, m7 |
|
101 |
+ psubw m0, m2 |
|
102 |
+ psubw m1, m3 |
|
103 |
+ mova [r0+r4+0], m0 |
|
104 |
+ mova [r0+r4+8], m1 |
|
105 |
+ add r1, r3 |
|
106 |
+ add r2, r3 |
|
107 |
+ add r4, 16 |
|
108 |
+ jne .loop |
|
109 |
+ REP_RET |
0 | 110 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,47 @@ |
0 |
+/* |
|
1 |
+ * SIMD-optimized pixel operations |
|
2 |
+ * |
|
3 |
+ * This file is part of Libav. |
|
4 |
+ * |
|
5 |
+ * Libav is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * Libav is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with Libav; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include "libavutil/attributes.h" |
|
21 |
+#include "libavutil/cpu.h" |
|
22 |
+#include "libavutil/x86/cpu.h" |
|
23 |
+#include "libavcodec/pixblockdsp.h" |
|
24 |
+ |
|
25 |
+void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); |
|
26 |
+void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); |
|
27 |
+void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, |
|
28 |
+ int stride); |
|
29 |
+ |
|
30 |
+av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, |
|
31 |
+ AVCodecContext *avctx, |
|
32 |
+ unsigned high_bit_depth) |
|
33 |
+{ |
|
34 |
+ int cpu_flags = av_get_cpu_flags(); |
|
35 |
+ |
|
36 |
+ if (EXTERNAL_MMX(cpu_flags)) { |
|
37 |
+ if (!high_bit_depth) |
|
38 |
+ c->get_pixels = ff_get_pixels_mmx; |
|
39 |
+ c->diff_pixels = ff_diff_pixels_mmx; |
|
40 |
+ } |
|
41 |
+ |
|
42 |
+ if (EXTERNAL_SSE2(cpu_flags)) { |
|
43 |
+ if (!high_bit_depth) |
|
44 |
+ c->get_pixels = ff_get_pixels_sse2; |
|
45 |
+ } |
|
46 |
+} |