Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master:
mpegvideo: drop unnecessary arguments to hpel_motion()
mpegvideo: drop 'inline' from some functions
nellymoserdec: drop support for s16 output.
bmpdec: only initialize palette for pal8.
build: Properly remove object files while cleaning
flacdsp: arm optimised lpc filter
compat/vsnprintf: return number of bytes required on truncation.

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2012/09/16 21:48:45
Showing 10 changed files
... ...
@@ -124,4 +124,7 @@ CLEANSUFFIXES     = *.d *.o *~ *.h.c *.map *.ver *.ho *.gcno *.gcda
124 124
 DISTCLEANSUFFIXES = *.pc
125 125
 LIBSUFFIXES       = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
126 126
 
127
+clean::
128
+	$(RM) $(OBJS) $(OBJS:.o=.d)
129
+
127 130
 -include $(wildcard $(OBJS:.o=.d) $(HOSTOBJS:.o=.d) $(TESTOBJS:.o=.d) $(HOBJS:.o=.d))
... ...
@@ -8,6 +8,9 @@ OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \
8 8
 
9 9
 ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
10 10
 
11
+OBJS-$(CONFIG_FLAC_DECODER)            += arm/flacdsp_init_arm.o        \
12
+                                          arm/flacdsp_arm.o             \
13
+
11 14
 OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
12 15
 ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
13 16
 
14 17
new file mode 100644
... ...
@@ -0,0 +1,146 @@
0
+/*
1
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
2
+ *
3
+ * This file is part of FFmpeg
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/arm/asm.S"
21
+
22
+function flac_lpc_16_1_arm
23
+        ldr             r12, [sp]
24
+        push            {r4, lr}
25
+        ldr             r1,  [r1]
26
+        subs            r12, r12, #2
27
+        ldr             lr,  [r0], #4
28
+        beq             2f
29
+        it              lt
30
+        poplt           {r4, pc}
31
+1:
32
+        mul             r4,  lr,  r1
33
+        ldm             r0,  {r2, lr}
34
+        add_sh          r2,  r2,  r4,  asr r3
35
+        mul             r4,  r2,  r1
36
+        subs            r12, r12, #2
37
+        add_sh          lr,  lr,  r4,  asr r3
38
+        stm             r0!, {r2, lr}
39
+        bgt             1b
40
+        it              lt
41
+        poplt           {r4, pc}
42
+2:
43
+        mul             r4,  lr,  r1
44
+        ldr             r2,  [r0]
45
+        add_sh          r2,  r2,  r4,  asr r3
46
+        str             r2,  [r0]
47
+        pop             {r4, pc}
48
+endfunc
49
+
50
+function flac_lpc_16_2_arm
51
+        ldr             r12, [sp]
52
+        subs            r12, r12, r2
53
+        it              le
54
+        bxle            lr
55
+
56
+        push            {r4-r9, lr}
57
+        ldm             r0!, {r6, r7}
58
+        ldm             r1,  {r8, r9}
59
+        subs            r12, r12, #1
60
+        beq             2f
61
+1:
62
+        mul             r4,  r6,  r8
63
+        mul             r5,  r7,  r8
64
+        mla             r4,  r7,  r9,  r4
65
+        ldm             r0,  {r6, r7}
66
+        add_sh          r6,  r6,  r4,  asr r3
67
+        mla             r5,  r6,  r9,  r5
68
+        add_sh          r7,  r7,  r5,  asr r3
69
+        stm             r0!, {r6, r7}
70
+        subs            r12, r12, #2
71
+        bgt             1b
72
+        it              lt
73
+        poplt           {r4-r9, pc}
74
+2:
75
+        mul             r4,  r6,  r8
76
+        mla             r4,  r7,  r9,  r4
77
+        ldr             r5,  [r0]
78
+        add_sh          r5,  r5,  r4,  asr r3
79
+        str             r5,  [r0]
80
+        pop             {r4-r9, pc}
81
+endfunc
82
+
83
+function ff_flac_lpc_16_arm, export=1
84
+        cmp             r2,  #2
85
+        blt             flac_lpc_16_1_arm
86
+        beq             flac_lpc_16_2_arm
87
+
88
+        ldr             r12, [sp]
89
+        subs            r12, r12, r2
90
+        it              le
91
+        bxle            lr
92
+
93
+        push            {r4-r9, lr}
94
+
95
+        subs            r12, r12, #1
96
+        beq             3f
97
+1:
98
+        sub             lr,  r2,  #2
99
+        mov             r4,  #0
100
+        mov             r5,  #0
101
+
102
+        ldr             r7,  [r0], #4
103
+        ldr             r9,  [r1], #4
104
+2:
105
+        mla             r4,  r7,  r9,  r4
106
+        ldm             r0!, {r6, r7}
107
+        mla             r5,  r6,  r9,  r5
108
+        ldm             r1!, {r8, r9}
109
+        mla             r4,  r6,  r8,  r4
110
+        subs            lr,  lr,  #2
111
+        mla             r5,  r7,  r8,  r5
112
+        bgt             2b
113
+        blt             6f
114
+
115
+        mla             r4,  r7,  r9,  r4
116
+        ldr             r7,  [r0], #4
117
+        mla             r5,  r7,  r9,  r5
118
+        ldr             r9,  [r1], #4
119
+6:
120
+        mla             r4,  r7,  r9,  r4
121
+        ldm             r0,  {r6, r7}
122
+        add_sh          r6,  r6,  r4,  asr r3
123
+        mla             r5,  r6,  r9,  r5
124
+        add_sh          r7,  r7,  r5,  asr r3
125
+        stm             r0!, {r6, r7}
126
+        sub             r0,  r0,  r2,  lsl #2
127
+        sub             r1,  r1,  r2,  lsl #2
128
+
129
+        subs            r12, r12, #2
130
+        bgt             1b
131
+        it              lt
132
+        poplt           {r4-r9, pc}
133
+3:
134
+        mov             r4,  #0
135
+4:
136
+        ldr             r5,  [r1], #4
137
+        ldr             r6,  [r0], #4
138
+        mla             r4,  r5,  r6,  r4
139
+        subs            r2,  r2,  #1
140
+        bgt             4b
141
+        ldr             r5,  [r0]
142
+        add_sh          r5,  r5,  r4,  asr r3
143
+        str             r5,  [r0]
144
+        pop             {r4-r9, pc}
145
+endfunc
0 146
new file mode 100644
... ...
@@ -0,0 +1,32 @@
0
+/*
1
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavcodec/flacdsp.h"
21
+#include "config.h"
22
+
23
+void ff_flac_lpc_16_arm(int32_t *samples, const int coeffs[32], int order,
24
+                        int qlevel, int len);
25
+
26
+av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt,
27
+                                 int bps)
28
+{
29
+    if (bps <= 16)
30
+        c->lpc = ff_flac_lpc_16_arm;
31
+}
... ...
@@ -232,9 +232,6 @@ static int bmp_decode_frame(AVCodecContext *avctx,
232 232
     if(comp == BMP_RLE4 || comp == BMP_RLE8)
233 233
         memset(p->data[0], 0, avctx->height * p->linesize[0]);
234 234
 
235
-    if(depth == 4 || depth == 8)
236
-        memset(p->data[1], 0, 1024);
237
-
238 235
     if(height > 0){
239 236
         ptr = p->data[0] + (avctx->height - 1) * p->linesize[0];
240 237
         linesize = -p->linesize[0];
... ...
@@ -245,6 +242,9 @@ static int bmp_decode_frame(AVCodecContext *avctx,
245 245
 
246 246
     if(avctx->pix_fmt == PIX_FMT_PAL8){
247 247
         int colors = 1 << depth;
248
+
249
+        memset(p->data[1], 0, 1024);
250
+
248 251
         if(ihsize >= 36){
249 252
             int t;
250 253
             buf = buf0 + 46;
... ...
@@ -21,6 +21,7 @@
21 21
 #include "libavutil/attributes.h"
22 22
 #include "libavutil/samplefmt.h"
23 23
 #include "flacdsp.h"
24
+#include "config.h"
24 25
 
25 26
 #define SAMPLE_SIZE 16
26 27
 #define PLANAR 0
... ...
@@ -119,4 +120,7 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt,
119 119
         c->decorrelate[3] = flac_decorrelate_ms_c_16p;
120 120
         break;
121 121
     }
122
+
123
+    if (ARCH_ARM)
124
+        ff_flacdsp_init_arm(c, fmt, bps);
122 125
 }
... ...
@@ -30,5 +30,6 @@ typedef struct FLACDSPContext {
30 30
 } FLACDSPContext;
31 31
 
32 32
 void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
33
+void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
33 34
 
34 35
 #endif /* AVCODEC_FLACDSP_H */
... ...
@@ -30,9 +30,9 @@
30 30
 #include "msmpeg4.h"
31 31
 #include <limits.h>
32 32
 
33
-static inline void gmc1_motion(MpegEncContext *s,
34
-                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
35
-                               uint8_t **ref_picture)
33
+static void gmc1_motion(MpegEncContext *s,
34
+                        uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
35
+                        uint8_t **ref_picture)
36 36
 {
37 37
     uint8_t *ptr;
38 38
     int offset, src_x, src_y, linesize, uvlinesize;
... ...
@@ -116,9 +116,9 @@ static inline void gmc1_motion(MpegEncContext *s,
116 116
     return;
117 117
 }
118 118
 
119
-static inline void gmc_motion(MpegEncContext *s,
120
-                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
121
-                               uint8_t **ref_picture)
119
+static void gmc_motion(MpegEncContext *s,
120
+                       uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
121
+                       uint8_t **ref_picture)
122 122
 {
123 123
     uint8_t *ptr;
124 124
     int linesize, uvlinesize;
... ...
@@ -174,11 +174,8 @@ static inline void gmc_motion(MpegEncContext *s,
174 174
 
175 175
 static inline int hpel_motion(MpegEncContext *s,
176 176
                                   uint8_t *dest, uint8_t *src,
177
-                                  int field_based, int field_select,
178 177
                                   int src_x, int src_y,
179
-                                  int width, int height, int stride,
180
-                                  int h_edge_pos, int v_edge_pos,
181
-                                  int w, int h, op_pixels_func *pix_op,
178
+                                  op_pixels_func *pix_op,
182 179
                                   int motion_x, int motion_y)
183 180
 {
184 181
     int dxy;
... ...
@@ -189,26 +186,24 @@ static inline int hpel_motion(MpegEncContext *s,
189 189
     src_y += motion_y >> 1;
190 190
 
191 191
     /* WARNING: do no forget half pels */
192
-    src_x = av_clip(src_x, -16, width); //FIXME unneeded for emu?
193
-    if (src_x == width)
192
+    src_x = av_clip(src_x, -16, s->width); //FIXME unneeded for emu?
193
+    if (src_x == s->width)
194 194
         dxy &= ~1;
195
-    src_y = av_clip(src_y, -16, height);
196
-    if (src_y == height)
195
+    src_y = av_clip(src_y, -16, s->height);
196
+    if (src_y == s->height)
197 197
         dxy &= ~2;
198
-    src += src_y * stride + src_x;
198
+    src += src_y * s->linesize + src_x;
199 199
 
200 200
     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
201
-        if(   (unsigned)src_x > FFMAX(h_edge_pos - (motion_x&1) - w, 0)
202
-           || (unsigned)src_y > FFMAX(v_edge_pos - (motion_y&1) - h, 0)){
203
-            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
204
-                             src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
201
+        if(   (unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x&1) - 8, 0)
202
+           || (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y&1) - 8, 0)){
203
+            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, 9, 9,
204
+                             src_x, src_y, s->h_edge_pos, s->v_edge_pos);
205 205
             src= s->edge_emu_buffer;
206 206
             emu=1;
207 207
         }
208 208
     }
209
-    if(field_select)
210
-        src += s->linesize;
211
-    pix_op[dxy](dest, src, stride, h);
209
+    pix_op[dxy](dest, src, s->linesize, 8);
212 210
     return emu;
213 211
 }
214 212
 
... ...
@@ -447,11 +442,9 @@ static inline void obmc_motion(MpegEncContext *s,
447 447
             ptr[i]= ptr[MID];
448 448
         }else{
449 449
             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
450
-            hpel_motion(s, ptr[i], src, 0, 0,
450
+            hpel_motion(s, ptr[i], src,
451 451
                         src_x, src_y,
452
-                        s->width, s->height, s->linesize,
453
-                        s->h_edge_pos, s->v_edge_pos,
454
-                        8, 8, pix_op,
452
+                        pix_op,
455 453
                         mv[i][0], mv[i][1]);
456 454
         }
457 455
     }
... ...
@@ -554,11 +547,12 @@ static inline void qpel_motion(MpegEncContext *s,
554 554
 /**
555 555
  * h263 chroma 4mv motion compensation.
556 556
  */
557
-static inline void chroma_4mv_motion(MpegEncContext *s,
558
-                                     uint8_t *dest_cb, uint8_t *dest_cr,
559
-                                     uint8_t **ref_picture,
560
-                                     op_pixels_func *pix_op,
561
-                                     int mx, int my){
557
+static void chroma_4mv_motion(MpegEncContext *s,
558
+                              uint8_t *dest_cb, uint8_t *dest_cr,
559
+                              uint8_t **ref_picture,
560
+                              op_pixels_func *pix_op,
561
+                              int mx, int my)
562
+{
562 563
     int dxy, emu=0, src_x, src_y, offset;
563 564
     uint8_t *ptr;
564 565
 
... ...
@@ -773,11 +767,9 @@ static av_always_inline void MPV_motion_internal(MpegEncContext *s,
773 773
         }else{
774 774
             for(i=0;i<4;i++) {
775 775
                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
776
-                            ref_picture[0], 0, 0,
776
+                            ref_picture[0],
777 777
                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
778
-                            s->width, s->height, s->linesize,
779
-                            s->h_edge_pos, s->v_edge_pos,
780
-                            8, 8, pix_op[1],
778
+                            pix_op[1],
781 779
                             s->mv[dir][i][0], s->mv[dir][i][1]);
782 780
 
783 781
                 mx += s->mv[dir][i][0];
... ...
@@ -48,13 +48,11 @@
48 48
 typedef struct NellyMoserDecodeContext {
49 49
     AVCodecContext* avctx;
50 50
     AVFrame         frame;
51
-    float          *float_buf;
52 51
     AVLFG           random_state;
53 52
     GetBitContext   gb;
54 53
     float           scale_bias;
55 54
     DSPContext      dsp;
56 55
     FFTContext      imdct_ctx;
57
-    FmtConvertContext fmt_conv;
58 56
     DECLARE_ALIGNED(32, float, imdct_buf)[2][NELLY_BUF_LEN];
59 57
     float          *imdct_out;
60 58
     float          *imdct_prev;
... ...
@@ -124,19 +122,8 @@ static av_cold int decode_init(AVCodecContext * avctx) {
124 124
 
125 125
     ff_dsputil_init(&s->dsp, avctx);
126 126
 
127
-    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
128
-        s->scale_bias = 1.0/(32768*8);
129
-        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
130
-    } else {
131
-        s->scale_bias = 1.0/(1*8);
132
-        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
133
-        ff_fmt_convert_init(&s->fmt_conv, avctx);
134
-        s->float_buf = av_mallocz(NELLY_SAMPLES * sizeof(*s->float_buf));
135
-        if (!s->float_buf) {
136
-            av_log(avctx, AV_LOG_ERROR, "error allocating float buffer\n");
137
-            return AVERROR(ENOMEM);
138
-        }
139
-    }
127
+    s->scale_bias = 1.0/(32768*8);
128
+    avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
140 129
 
141 130
     /* Generate overlap window */
142 131
     if (!ff_sine_128[127])
... ...
@@ -158,7 +145,6 @@ static int decode_tag(AVCodecContext *avctx, void *data,
158 158
     int buf_size = avpkt->size;
159 159
     NellyMoserDecodeContext *s = avctx->priv_data;
160 160
     int blocks, i, ret;
161
-    int16_t *samples_s16;
162 161
     float   *samples_flt;
163 162
 
164 163
     blocks     = buf_size / NELLY_BLOCK_LEN;
... ...
@@ -188,18 +174,11 @@ static int decode_tag(AVCodecContext *avctx, void *data,
188 188
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
189 189
         return ret;
190 190
     }
191
-    samples_s16 = (int16_t *)s->frame.data[0];
192 191
     samples_flt = (float   *)s->frame.data[0];
193 192
 
194 193
     for (i=0 ; i<blocks ; i++) {
195
-        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
196
-            nelly_decode_block(s, buf, samples_flt);
197
-            samples_flt += NELLY_SAMPLES;
198
-        } else {
199
-            nelly_decode_block(s, buf, s->float_buf);
200
-            s->fmt_conv.float_to_int16(samples_s16, s->float_buf, NELLY_SAMPLES);
201
-            samples_s16 += NELLY_SAMPLES;
202
-        }
194
+        nelly_decode_block(s, buf, samples_flt);
195
+        samples_flt += NELLY_SAMPLES;
203 196
         buf += NELLY_BLOCK_LEN;
204 197
     }
205 198
 
... ...
@@ -212,7 +191,6 @@ static int decode_tag(AVCodecContext *avctx, void *data,
212 212
 static av_cold int decode_end(AVCodecContext * avctx) {
213 213
     NellyMoserDecodeContext *s = avctx->priv_data;
214 214
 
215
-    av_freep(&s->float_buf);
216 215
     ff_mdct_end(&s->imdct_ctx);
217 216
 
218 217
     return 0;
... ...
@@ -229,6 +207,5 @@ AVCodec ff_nellymoser_decoder = {
229 229
     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_PARAM_CHANGE,
230 230
     .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
231 231
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT,
232
-                                                      AV_SAMPLE_FMT_S16,
233 232
                                                       AV_SAMPLE_FMT_NONE },
234 233
 };
... ...
@@ -186,6 +186,12 @@ ELF     .size   \name, . - \name
186 186
 #endif
187 187
 .endm
188 188
 
189
+.macro  add_sh          rd,  rn,  rm,  sh:vararg
190
+A       add             \rd, \rn, \rm, \sh
191
+T       mov             \rm, \rm, \sh
192
+T       add             \rd, \rn, \rm
193
+.endm
194
+
189 195
 .macro  ldr_pre         rt,  rn,  rm:vararg
190 196
 A       ldr             \rt, [\rn, \rm]!
191 197
 T       add             \rn, \rn, \rm