Browse code

Add x86-optimized versions of exponent_min().

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>

Justin Ruggles authored on 2011/02/11 02:20:36
Showing 8 changed files
... ...
@@ -55,8 +55,10 @@ OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
55 55
                                           mpeg4audio.o
56 56
 OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
57 57
 OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o
58
-OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3tab.o ac3.o
59
-OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3tab.o ac3.o
58
+OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3tab.o ac3.o \
59
+                                          ac3dsp.o
60
+OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3tab.o ac3.o \
61
+                                          ac3dsp.o
60 62
 OBJS-$(CONFIG_ALAC_DECODER)            += alac.o
61 63
 OBJS-$(CONFIG_ALAC_ENCODER)            += alacenc.o
62 64
 OBJS-$(CONFIG_ALS_DECODER)             += alsdec.o bgmc.o mpeg4audio.o
63 65
new file mode 100644
... ...
@@ -0,0 +1,51 @@
0
+/*
1
+ * AC-3 DSP utils
2
+ * Copyright (c) 2011 Justin Ruggles
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include "avcodec.h"
22
+#include "ac3dsp.h"
23
+
24
+static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
25
+{
26
+    int blk, i;
27
+
28
+    if (!num_reuse_blocks)
29
+        return;
30
+
31
+    for (i = 0; i < nb_coefs; i++) {
32
+        uint8_t min_exp = *exp;
33
+        uint8_t *exp1 = exp + 256;
34
+        for (blk = 0; blk < num_reuse_blocks; blk++) {
35
+            uint8_t next_exp = *exp1;
36
+            if (next_exp < min_exp)
37
+                min_exp = next_exp;
38
+            exp1 += 256;
39
+        }
40
+        *exp++ = min_exp;
41
+    }
42
+}
43
+
44
+av_cold void ff_ac3dsp_init(AC3DSPContext *c)
45
+{
46
+    c->ac3_exponent_min = ac3_exponent_min_c;
47
+
48
+    if (HAVE_MMX)
49
+        ff_ac3dsp_init_x86(c);
50
+}
0 51
new file mode 100644
... ...
@@ -0,0 +1,43 @@
0
+/*
1
+ * AC-3 DSP utils
2
+ * Copyright (c) 2011 Justin Ruggles
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#ifndef AVCODEC_AC3DSP_H
22
+#define AVCODEC_AC3DSP_H
23
+
24
+#include <stdint.h>
25
+
26
+typedef struct AC3DSPContext {
27
+    /**
28
+     * Set each encoded exponent in a block to the minimum of itself and the
29
+     * exponents in the same frequency bin of up to 5 following blocks.
30
+     * @param exp   pointer to the start of the current block of exponents.
31
+     *              constraints: align 16
32
+     * @param num_reuse_blocks  number of blocks that will reuse exponents from the current block.
33
+     *                          constraints: range 0 to 5
34
+     * @param nb_coefs  number of frequency coefficients.
35
+     */
36
+    void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
37
+} AC3DSPContext;
38
+
39
+void ff_ac3dsp_init    (AC3DSPContext *c);
40
+void ff_ac3dsp_init_x86(AC3DSPContext *c);
41
+
42
+#endif /* AVCODEC_AC3DSP_H */
... ...
@@ -33,6 +33,7 @@
33 33
 #include "avcodec.h"
34 34
 #include "put_bits.h"
35 35
 #include "dsputil.h"
36
+#include "ac3dsp.h"
36 37
 #include "ac3.h"
37 38
 #include "audioconvert.h"
38 39
 
... ...
@@ -86,6 +87,7 @@ typedef struct AC3Block {
86 86
 typedef struct AC3EncodeContext {
87 87
     PutBitContext pb;                       ///< bitstream writer context
88 88
     DSPContext dsp;
89
+    AC3DSPContext ac3dsp;                   ///< AC-3 optimized functions
89 90
     AC3MDCTContext mdct;                    ///< MDCT context
90 91
 
91 92
     AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
... ...
@@ -458,7 +460,6 @@ static void compute_exp_strategy_ch(AC3EncodeContext *s, uint8_t *exp_strategy,
458 458
             exp_strategy[blk] = EXP_REUSE;
459 459
         exp += AC3_MAX_COEFS;
460 460
     }
461
-    emms_c();
462 461
 
463 462
     /* now select the encoding strategy type : if exponents are often
464 463
        recoded, we use a coarse encoding */
... ...
@@ -499,31 +500,6 @@ static void compute_exp_strategy(AC3EncodeContext *s)
499 499
 
500 500
 
501 501
 /**
502
- * Set each encoded exponent in a block to the minimum of itself and the
503
- * exponents in the same frequency bin of up to 5 following blocks.
504
- */
505
-static void exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
506
-{
507
-    int blk, i;
508
-
509
-    if (!num_reuse_blocks)
510
-        return;
511
-
512
-    for (i = 0; i < nb_coefs; i++) {
513
-        uint8_t min_exp = *exp;
514
-        uint8_t *exp1 = exp + AC3_MAX_COEFS;
515
-        for (blk = 0; blk < num_reuse_blocks; blk++) {
516
-            uint8_t next_exp = *exp1;
517
-            if (next_exp < min_exp)
518
-                min_exp = next_exp;
519
-            exp1 += AC3_MAX_COEFS;
520
-        }
521
-        *exp++ = min_exp;
522
-    }
523
-}
524
-
525
-
526
-/**
527 502
  * Update the exponents so that they are the ones the decoder will decode.
528 503
  */
529 504
 static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
... ...
@@ -616,7 +592,7 @@ static void encode_exponents(AC3EncodeContext *s)
616 616
             num_reuse_blocks = blk1 - blk - 1;
617 617
 
618 618
             /* for the EXP_REUSE case we select the min of the exponents */
619
-            exponent_min(exp, num_reuse_blocks, nb_coefs);
619
+            s->ac3dsp.ac3_exponent_min(exp, num_reuse_blocks, nb_coefs);
620 620
 
621 621
             encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk]);
622 622
 
... ...
@@ -704,6 +680,8 @@ static void process_exponents(AC3EncodeContext *s)
704 704
     encode_exponents(s);
705 705
 
706 706
     group_exponents(s);
707
+
708
+    emms_c();
707 709
 }
708 710
 
709 711
 
... ...
@@ -1856,6 +1834,7 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
1856 1856
     avctx->coded_frame= avcodec_alloc_frame();
1857 1857
 
1858 1858
     dsputil_init(&s->dsp, avctx);
1859
+    ff_ac3dsp_init(&s->ac3dsp);
1859 1860
 
1860 1861
     return 0;
1861 1862
 init_fail:
... ...
@@ -17,6 +17,10 @@ MMX-OBJS-$(CONFIG_H264PRED)            += x86/h264_intrapred_init.o
17 17
 
18 18
 YASM-OBJS-$(CONFIG_VC1_DECODER)        += x86/vc1dsp_yasm.o
19 19
 
20
+MMX-OBJS-$(CONFIG_AC3_ENCODER)         += x86/ac3dsp_mmx.o
21
+MMX-OBJS-$(CONFIG_AC3_FIXED_ENCODER)   += x86/ac3dsp_mmx.o
22
+YASM-OBJS-$(CONFIG_AC3_ENCODER)        += x86/ac3dsp.o
23
+YASM-OBJS-$(CONFIG_AC3_FIXED_ENCODER)  += x86/ac3dsp.o
20 24
 MMX-OBJS-$(CONFIG_CAVS_DECODER)        += x86/cavsdsp_mmx.o
21 25
 MMX-OBJS-$(CONFIG_MP1FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
22 26
 MMX-OBJS-$(CONFIG_MP2FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
23 27
new file mode 100644
... ...
@@ -0,0 +1,67 @@
0
+;*****************************************************************************
1
+;* x86-optimized AC-3 DSP utils
2
+;* Copyright (c) 2011 Justin Ruggles
3
+;*
4
+;* This file is part of FFmpeg.
5
+;*
6
+;* FFmpeg is free software; you can redistribute it and/or
7
+;* modify it under the terms of the GNU Lesser General Public
8
+;* License as published by the Free Software Foundation; either
9
+;* version 2.1 of the License, or (at your option) any later version.
10
+;*
11
+;* FFmpeg is distributed in the hope that it will be useful,
12
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+;* Lesser General Public License for more details.
15
+;*
16
+;* You should have received a copy of the GNU Lesser General Public
17
+;* License along with FFmpeg; if not, write to the Free Software
18
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+;******************************************************************************
20
+
21
+%include "x86inc.asm"
22
+%include "x86util.asm"
23
+
24
+SECTION .text
25
+
26
+;-----------------------------------------------------------------------------
27
+; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
28
+;-----------------------------------------------------------------------------
29
+
30
+%macro AC3_EXPONENT_MIN 1
31
+cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
32
+    shl  reuse_blksq, 8
33
+    jz .end
34
+    LOOP_ALIGN
35
+.nextexp:
36
+    mov      offsetq, reuse_blksq
37
+    mova          m0, [expq+offsetq]
38
+    sub      offsetq, 256
39
+    LOOP_ALIGN
40
+.nextblk:
41
+    PMINUB        m0, [expq+offsetq], m1
42
+    sub      offsetq, 256
43
+    jae .nextblk
44
+    mova      [expq], m0
45
+    add         expq, mmsize
46
+    sub        expnq, mmsize
47
+    jg .nextexp
48
+.end:
49
+    REP_RET
50
+%endmacro
51
+
52
+%define PMINUB PMINUB_MMX
53
+%define LOOP_ALIGN
54
+INIT_MMX
55
+AC3_EXPONENT_MIN mmx
56
+%ifdef HAVE_MMX2
57
+%define PMINUB PMINUB_MMXEXT
58
+%define LOOP_ALIGN ALIGN 16
59
+AC3_EXPONENT_MIN mmxext
60
+%endif
61
+%ifdef HAVE_SSE
62
+INIT_XMM
63
+AC3_EXPONENT_MIN sse2
64
+%endif
65
+%undef PMINUB
66
+%undef LOOP_ALIGN
0 67
new file mode 100644
... ...
@@ -0,0 +1,45 @@
0
+/*
1
+ * x86-optimized AC-3 DSP utils
2
+ * Copyright (c) 2011 Justin Ruggles
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include "libavutil/x86_cpu.h"
22
+#include "dsputil_mmx.h"
23
+#include "libavcodec/ac3dsp.h"
24
+
25
+extern void ff_ac3_exponent_min_mmx   (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
26
+extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
27
+extern void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
28
+
29
+av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
30
+{
31
+    int mm_flags = av_get_cpu_flags();
32
+
33
+#if HAVE_YASM
34
+    if (mm_flags & AV_CPU_FLAG_MMX) {
35
+        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
36
+    }
37
+    if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
38
+        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
39
+    }
40
+    if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
41
+        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
42
+    }
43
+#endif
44
+}
... ...
@@ -434,3 +434,13 @@
434 434
     movh     [%7], %3
435 435
     movh  [%7+%8], %4
436 436
 %endmacro
437
+
438
+%macro PMINUB_MMX 3 ; dst, src, tmp
439
+    mova     %3, %1
440
+    psubusb  %3, %2
441
+    psubb    %1, %3
442
+%endmacro
443
+
444
+%macro PMINUB_MMXEXT 3 ; dst, src, ignored
445
+    pminub   %1, %2
446
+%endmacro