Signed-off-by: Mans Rullgard <mans@mansr.com>
| ... | ... |
@@ -68,6 +68,8 @@ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_neon.o \ |
| 68 | 68 |
|
| 69 | 69 |
NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_neon.o \ |
| 70 | 70 |
arm/rv34dsp_neon.o \ |
| 71 |
+ arm/rv40dsp_init_neon.o \ |
|
| 72 |
+ arm/h264cmc_neon.o \ |
|
| 71 | 73 |
|
| 72 | 74 |
NEON-OBJS-$(CONFIG_VP3_DECODER) += arm/vp3dsp_neon.o |
| 73 | 75 |
|
| ... | ... |
@@ -21,8 +21,8 @@ |
| 21 | 21 |
#include "asm.S" |
| 22 | 22 |
|
| 23 | 23 |
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ |
| 24 |
-.macro h264_chroma_mc8 type |
|
| 25 |
-function ff_\type\()_h264_chroma_mc8_neon, export=1 |
|
| 24 |
+.macro h264_chroma_mc8 type, codec=h264 |
|
| 25 |
+function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 |
|
| 26 | 26 |
push {r4-r7, lr}
|
| 27 | 27 |
ldrd r4, [sp, #20] |
| 28 | 28 |
.ifc \type,avg |
| ... | ... |
@@ -31,6 +31,15 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1 |
| 31 | 31 |
pld [r1] |
| 32 | 32 |
pld [r1, r2] |
| 33 | 33 |
|
| 34 |
+ .ifc \codec,rv40 |
|
| 35 |
+ movrel r6, rv40bias |
|
| 36 |
+ lsr r7, r5, #1 |
|
| 37 |
+ add r6, r6, r7, lsl #3 |
|
| 38 |
+ lsr r7, r4, #1 |
|
| 39 |
+ add r6, r6, r7, lsl #1 |
|
| 40 |
+ vld1.16 {d22[],d23[]}, [r6,:16]
|
|
| 41 |
+ .endif |
|
| 42 |
+ |
|
| 34 | 43 |
A muls r7, r4, r5 |
| 35 | 44 |
T mul r7, r4, r5 |
| 36 | 45 |
T cmp r7, #0 |
| ... | ... |
@@ -67,10 +76,17 @@ T cmp r7, #0 |
| 67 | 67 |
vmlal.u8 q9, d7, d1 |
| 68 | 68 |
vmlal.u8 q9, d4, d2 |
| 69 | 69 |
vmlal.u8 q9, d5, d3 |
| 70 |
- vrshrn.u16 d16, q8, #6 |
|
| 71 | 70 |
vld1.8 {d6, d7}, [r5], r4
|
| 72 | 71 |
pld [r1] |
| 72 |
+ .ifc \codec,h264 |
|
| 73 |
+ vrshrn.u16 d16, q8, #6 |
|
| 73 | 74 |
vrshrn.u16 d17, q9, #6 |
| 75 |
+ .else |
|
| 76 |
+ vadd.u16 q8, q8, q11 |
|
| 77 |
+ vadd.u16 q9, q9, q11 |
|
| 78 |
+ vshrn.u16 d16, q8, #6 |
|
| 79 |
+ vshrn.u16 d17, q9, #6 |
|
| 80 |
+ .endif |
|
| 74 | 81 |
.ifc \type,avg |
| 75 | 82 |
vld1.8 {d20}, [lr,:64], r2
|
| 76 | 83 |
vld1.8 {d21}, [lr,:64], r2
|
| ... | ... |
@@ -102,8 +118,15 @@ T cmp r7, #0 |
| 102 | 102 |
vmull.u8 q9, d6, d0 |
| 103 | 103 |
vmlal.u8 q9, d4, d1 |
| 104 | 104 |
vld1.8 {d6}, [r5], r4
|
| 105 |
+ .ifc \codec,h264 |
|
| 105 | 106 |
vrshrn.u16 d16, q8, #6 |
| 106 | 107 |
vrshrn.u16 d17, q9, #6 |
| 108 |
+ .else |
|
| 109 |
+ vadd.u16 q8, q8, q11 |
|
| 110 |
+ vadd.u16 q9, q9, q11 |
|
| 111 |
+ vshrn.u16 d16, q8, #6 |
|
| 112 |
+ vshrn.u16 d17, q9, #6 |
|
| 113 |
+ .endif |
|
| 107 | 114 |
.ifc \type,avg |
| 108 | 115 |
vld1.8 {d20}, [lr,:64], r2
|
| 109 | 116 |
vld1.8 {d21}, [lr,:64], r2
|
| ... | ... |
@@ -131,8 +154,15 @@ T cmp r7, #0 |
| 131 | 131 |
vmlal.u8 q9, d7, d1 |
| 132 | 132 |
pld [r1] |
| 133 | 133 |
vext.8 d5, d4, d5, #1 |
| 134 |
+ .ifc \codec,h264 |
|
| 134 | 135 |
vrshrn.u16 d16, q8, #6 |
| 135 | 136 |
vrshrn.u16 d17, q9, #6 |
| 137 |
+ .else |
|
| 138 |
+ vadd.u16 q8, q8, q11 |
|
| 139 |
+ vadd.u16 q9, q9, q11 |
|
| 140 |
+ vshrn.u16 d16, q8, #6 |
|
| 141 |
+ vshrn.u16 d17, q9, #6 |
|
| 142 |
+ .endif |
|
| 136 | 143 |
.ifc \type,avg |
| 137 | 144 |
vld1.8 {d20}, [lr,:64], r2
|
| 138 | 145 |
vld1.8 {d21}, [lr,:64], r2
|
| ... | ... |
@@ -149,8 +179,8 @@ endfunc |
| 149 | 149 |
.endm |
| 150 | 150 |
|
| 151 | 151 |
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ |
| 152 |
-.macro h264_chroma_mc4 type |
|
| 153 |
-function ff_\type\()_h264_chroma_mc4_neon, export=1 |
|
| 152 |
+.macro h264_chroma_mc4 type, codec=h264 |
|
| 153 |
+function ff_\type\()_\codec\()_chroma_mc4_neon, export=1 |
|
| 154 | 154 |
push {r4-r7, lr}
|
| 155 | 155 |
ldrd r4, [sp, #20] |
| 156 | 156 |
.ifc \type,avg |
| ... | ... |
@@ -159,6 +189,15 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1 |
| 159 | 159 |
pld [r1] |
| 160 | 160 |
pld [r1, r2] |
| 161 | 161 |
|
| 162 |
+ .ifc \codec,rv40 |
|
| 163 |
+ movrel r6, rv40bias |
|
| 164 |
+ lsr r7, r5, #1 |
|
| 165 |
+ add r6, r6, r7, lsl #3 |
|
| 166 |
+ lsr r7, r4, #1 |
|
| 167 |
+ add r6, r6, r7, lsl #1 |
|
| 168 |
+ vld1.16 {d22[],d23[]}, [r6,:16]
|
|
| 169 |
+ .endif |
|
| 170 |
+ |
|
| 162 | 171 |
A muls r7, r4, r5 |
| 163 | 172 |
T mul r7, r4, r5 |
| 164 | 173 |
T cmp r7, #0 |
| ... | ... |
@@ -199,7 +238,12 @@ T cmp r7, #0 |
| 199 | 199 |
vld1.8 {d6}, [r5], r4
|
| 200 | 200 |
vadd.i16 d16, d16, d17 |
| 201 | 201 |
vadd.i16 d17, d18, d19 |
| 202 |
+ .ifc \codec,h264 |
|
| 202 | 203 |
vrshrn.u16 d16, q8, #6 |
| 204 |
+ .else |
|
| 205 |
+ vadd.u16 q8, q8, q11 |
|
| 206 |
+ vshrn.u16 d16, q8, #6 |
|
| 207 |
+ .endif |
|
| 203 | 208 |
subs r3, r3, #2 |
| 204 | 209 |
pld [r1] |
| 205 | 210 |
.ifc \type,avg |
| ... | ... |
@@ -236,7 +280,12 @@ T cmp r7, #0 |
| 236 | 236 |
vld1.32 {d4[1]}, [r5], r4
|
| 237 | 237 |
vadd.i16 d16, d16, d17 |
| 238 | 238 |
vadd.i16 d17, d18, d19 |
| 239 |
+ .ifc \codec,h264 |
|
| 239 | 240 |
vrshrn.u16 d16, q8, #6 |
| 241 |
+ .else |
|
| 242 |
+ vadd.u16 q8, q8, q11 |
|
| 243 |
+ vshrn.u16 d16, q8, #6 |
|
| 244 |
+ .endif |
|
| 240 | 245 |
.ifc \type,avg |
| 241 | 246 |
vld1.32 {d20[0]}, [lr,:32], r2
|
| 242 | 247 |
vld1.32 {d20[1]}, [lr,:32], r2
|
| ... | ... |
@@ -266,7 +315,12 @@ T cmp r7, #0 |
| 266 | 266 |
vadd.i16 d16, d16, d17 |
| 267 | 267 |
vadd.i16 d17, d18, d19 |
| 268 | 268 |
pld [r1] |
| 269 |
+ .ifc \codec,h264 |
|
| 269 | 270 |
vrshrn.u16 d16, q8, #6 |
| 271 |
+ .else |
|
| 272 |
+ vadd.u16 q8, q8, q11 |
|
| 273 |
+ vshrn.u16 d16, q8, #6 |
|
| 274 |
+ .endif |
|
| 270 | 275 |
.ifc \type,avg |
| 271 | 276 |
vld1.32 {d20[0]}, [lr,:32], r2
|
| 272 | 277 |
vld1.32 {d20[1]}, [lr,:32], r2
|
| ... | ... |
@@ -352,9 +406,25 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1 |
| 352 | 352 |
endfunc |
| 353 | 353 |
.endm |
| 354 | 354 |
|
| 355 |
+#if CONFIG_H264_DECODER |
|
| 355 | 356 |
h264_chroma_mc8 put |
| 356 | 357 |
h264_chroma_mc8 avg |
| 357 | 358 |
h264_chroma_mc4 put |
| 358 | 359 |
h264_chroma_mc4 avg |
| 359 | 360 |
h264_chroma_mc2 put |
| 360 | 361 |
h264_chroma_mc2 avg |
| 362 |
+#endif |
|
| 363 |
+ |
|
| 364 |
+#if CONFIG_RV40_DECODER |
|
| 365 |
+const rv40bias |
|
| 366 |
+ .short 0, 16, 32, 16 |
|
| 367 |
+ .short 32, 28, 32, 28 |
|
| 368 |
+ .short 0, 32, 16, 32 |
|
| 369 |
+ .short 32, 28, 32, 28 |
|
| 370 |
+endconst |
|
| 371 |
+ |
|
| 372 |
+ h264_chroma_mc8 put, rv40 |
|
| 373 |
+ h264_chroma_mc8 avg, rv40 |
|
| 374 |
+ h264_chroma_mc4 put, rv40 |
|
| 375 |
+ h264_chroma_mc4 avg, rv40 |
|
| 376 |
+#endif |
| 361 | 377 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,38 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of Libav. |
|
| 4 |
+ * |
|
| 5 |
+ * Libav is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * Libav is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with Libav; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include <stdint.h> |
|
| 21 |
+ |
|
| 22 |
+#include "libavcodec/avcodec.h" |
|
| 23 |
+#include "libavcodec/rv34dsp.h" |
|
| 24 |
+ |
|
| 25 |
+void ff_put_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
| 26 |
+void ff_put_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
| 27 |
+ |
|
| 28 |
+void ff_avg_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
| 29 |
+void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); |
|
| 30 |
+ |
|
| 31 |
+void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp) |
|
| 32 |
+{
|
|
| 33 |
+ c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_neon; |
|
| 34 |
+ c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_neon; |
|
| 35 |
+ c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_neon; |
|
| 36 |
+ c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_neon; |
|
| 37 |
+} |
| ... | ... |
@@ -59,5 +59,6 @@ void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp); |
| 59 | 59 |
void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext *dsp); |
| 60 | 60 |
|
| 61 | 61 |
void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp); |
| 62 |
+void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext *dsp); |
|
| 62 | 63 |
|
| 63 | 64 |
#endif /* AVCODEC_RV34DSP_H */ |