Originally committed as revision 711 to svn://svn.ffmpeg.org/ffmpeg/trunk
| ... | ... |
@@ -22,6 +22,8 @@ |
| 22 | 22 |
|
| 23 | 23 |
void simple_idct_axp(DCTELEM *block); |
| 24 | 24 |
|
| 25 |
+void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, |
|
| 26 |
+ int line_size, int h); |
|
| 25 | 27 |
void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
| 26 | 28 |
int line_size); |
| 27 | 29 |
void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
| ... | ... |
@@ -232,12 +234,12 @@ static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) |
| 232 | 232 |
|
| 233 | 233 |
void dsputil_init_alpha(void) |
| 234 | 234 |
{
|
| 235 |
- put_pixels_tab[0] = put_pixels_axp; |
|
| 235 |
+ put_pixels_tab[0] = put_pixels_axp_asm; |
|
| 236 | 236 |
put_pixels_tab[1] = put_pixels_x2_axp; |
| 237 | 237 |
put_pixels_tab[2] = put_pixels_y2_axp; |
| 238 | 238 |
put_pixels_tab[3] = put_pixels_xy2_axp; |
| 239 | 239 |
|
| 240 |
- put_no_rnd_pixels_tab[0] = put_pixels_axp; |
|
| 240 |
+ put_no_rnd_pixels_tab[0] = put_pixels_axp_asm; |
|
| 241 | 241 |
put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_axp; |
| 242 | 242 |
put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp; |
| 243 | 243 |
put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp; |
| ... | ... |
@@ -44,6 +44,123 @@ |
| 44 | 44 |
.text |
| 45 | 45 |
|
| 46 | 46 |
/************************************************************************ |
| 47 |
+ * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, |
|
| 48 |
+ * int line_size, int h) |
|
| 49 |
+ */ |
|
| 50 |
+ .align 6 |
|
| 51 |
+ .globl put_pixels_axp_asm |
|
| 52 |
+ .ent put_pixels_axp_asm |
|
| 53 |
+put_pixels_axp_asm: |
|
| 54 |
+ .frame sp, 0, ra |
|
| 55 |
+ .prologue 0 |
|
| 56 |
+ |
|
| 57 |
+#ifdef HAVE_GPROF |
|
| 58 |
+ lda AT, _mcount |
|
| 59 |
+ jsr AT, (AT), _mcount |
|
| 60 |
+#endif |
|
| 61 |
+ |
|
| 62 |
+ and a1, 7, t0 |
|
| 63 |
+ beq t0, $aligned |
|
| 64 |
+ |
|
| 65 |
+ .align 4 |
|
| 66 |
+$unaligned: |
|
| 67 |
+ ldq_u t0, 0(a1) |
|
| 68 |
+ ldq_u t1, 8(a1) |
|
| 69 |
+ addq a1, a2, a1 |
|
| 70 |
+ nop |
|
| 71 |
+ |
|
| 72 |
+ ldq_u t2, 0(a1) |
|
| 73 |
+ ldq_u t3, 8(a1) |
|
| 74 |
+ addq a1, a2, a1 |
|
| 75 |
+ nop |
|
| 76 |
+ |
|
| 77 |
+ ldq_u t4, 0(a1) |
|
| 78 |
+ ldq_u t5, 8(a1) |
|
| 79 |
+ addq a1, a2, a1 |
|
| 80 |
+ nop |
|
| 81 |
+ |
|
| 82 |
+ ldq_u t6, 0(a1) |
|
| 83 |
+ ldq_u t7, 8(a1) |
|
| 84 |
+ extql t0, a1, t0 |
|
| 85 |
+ addq a1, a2, a1 |
|
| 86 |
+ |
|
| 87 |
+ extqh t1, a1, t1 |
|
| 88 |
+ addq a0, a2, t8 |
|
| 89 |
+ extql t2, a1, t2 |
|
| 90 |
+ addq t8, a2, t9 |
|
| 91 |
+ |
|
| 92 |
+ extqh t3, a1, t3 |
|
| 93 |
+ addq t9, a2, ta |
|
| 94 |
+ extql t4, a1, t4 |
|
| 95 |
+ or t0, t1, t0 |
|
| 96 |
+ |
|
| 97 |
+ extqh t5, a1, t5 |
|
| 98 |
+ or t2, t3, t2 |
|
| 99 |
+ extql t6, a1, t6 |
|
| 100 |
+ or t4, t5, t4 |
|
| 101 |
+ |
|
| 102 |
+ extqh t7, a1, t7 |
|
| 103 |
+ or t6, t7, t6 |
|
| 104 |
+ stq t0, 0(a0) |
|
| 105 |
+ stq t2, 0(t8) |
|
| 106 |
+ |
|
| 107 |
+ stq t4, 0(t9) |
|
| 108 |
+ subq a3, 4, a3 |
|
| 109 |
+ stq t6, 0(ta) |
|
| 110 |
+ addq ta, a2, a0 |
|
| 111 |
+ |
|
| 112 |
+ bne a3, $unaligned |
|
| 113 |
+ ret |
|
| 114 |
+ |
|
| 115 |
+ .align 4 |
|
| 116 |
+$aligned: |
|
| 117 |
+ ldq t0, 0(a1) |
|
| 118 |
+ addq a1, a2, a1 |
|
| 119 |
+ ldq t1, 0(a1) |
|
| 120 |
+ addq a1, a2, a1 |
|
| 121 |
+ |
|
| 122 |
+ ldq t2, 0(a1) |
|
| 123 |
+ addq a1, a2, a1 |
|
| 124 |
+ ldq t3, 0(a1) |
|
| 125 |
+ addq a1, a2, a1 |
|
| 126 |
+ |
|
| 127 |
+ ldq t4, 0(a1) |
|
| 128 |
+ addq a1, a2, a1 |
|
| 129 |
+ ldq t5, 0(a1) |
|
| 130 |
+ addq a1, a2, a1 |
|
| 131 |
+ |
|
| 132 |
+ ldq t6, 0(a1) |
|
| 133 |
+ addq a1, a2, a1 |
|
| 134 |
+ ldq t7, 0(a1) |
|
| 135 |
+ addq a1, a2, a1 |
|
| 136 |
+ |
|
| 137 |
+ addq a0, a2, t8 |
|
| 138 |
+ stq t0, 0(a0) |
|
| 139 |
+ addq t8, a2, t9 |
|
| 140 |
+ stq t1, 0(t8) |
|
| 141 |
+ |
|
| 142 |
+ addq t9, a2, ta |
|
| 143 |
+ stq t2, 0(t9) |
|
| 144 |
+ addq ta, a2, tb |
|
| 145 |
+ stq t3, 0(ta) |
|
| 146 |
+ |
|
| 147 |
+ addq tb, a2, tc |
|
| 148 |
+ stq t4, 0(tb) |
|
| 149 |
+ addq tc, a2, td |
|
| 150 |
+ stq t5, 0(tc) |
|
| 151 |
+ |
|
| 152 |
+ addq td, a2, te |
|
| 153 |
+ stq t6, 0(td) |
|
| 154 |
+ addq te, a2, a0 |
|
| 155 |
+ stq t7, 0(te) |
|
| 156 |
+ |
|
| 157 |
+ subq a3, 8, a3 |
|
| 158 |
+ bne a3, $aligned |
|
| 159 |
+ |
|
| 160 |
+ ret |
|
| 161 |
+ .end put_pixels_axp_asm |
|
| 162 |
+ |
|
| 163 |
+/************************************************************************ |
|
| 47 | 164 |
* void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
| 48 | 165 |
* int line_size) |
| 49 | 166 |
*/ |