Originally committed as revision 16675 to svn://svn.ffmpeg.org/ffmpeg/trunk
| ... | ... |
@@ -42,7 +42,7 @@ void get_pixels_mvi(DCTELEM *restrict block, |
| 42 | 42 |
void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, |
| 43 | 43 |
int stride); |
| 44 | 44 |
int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
| 45 |
-int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); |
|
| 45 |
+int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
|
| 46 | 46 |
int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
| 47 | 47 |
int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
| 48 | 48 |
int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
| ... | ... |
@@ -287,11 +287,6 @@ void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, |
| 287 | 287 |
put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); |
| 288 | 288 |
} |
| 289 | 289 |
|
| 290 |
-static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride) |
|
| 291 |
-{
|
|
| 292 |
- return pix_abs16x16_mvi_asm(a, b, stride); |
|
| 293 |
-} |
|
| 294 |
- |
|
| 295 | 290 |
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) |
| 296 | 291 |
{
|
| 297 | 292 |
c->put_pixels_tab[0][0] = put_pixels16_axp_asm; |
| ... | ... |
@@ -343,10 +338,9 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) |
| 343 | 343 |
|
| 344 | 344 |
c->get_pixels = get_pixels_mvi; |
| 345 | 345 |
c->diff_pixels = diff_pixels_mvi; |
| 346 |
- c->sad[0] = sad16x16_mvi; |
|
| 346 |
+ c->sad[0] = pix_abs16x16_mvi_asm; |
|
| 347 | 347 |
c->sad[1] = pix_abs8x8_mvi; |
| 348 |
-// c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed |
|
| 349 |
- c->pix_abs[0][0] = sad16x16_mvi; |
|
| 348 |
+ c->pix_abs[0][0] = pix_abs16x16_mvi_asm; |
|
| 350 | 349 |
c->pix_abs[1][0] = pix_abs8x8_mvi; |
| 351 | 350 |
c->pix_abs[0][1] = pix_abs16x16_x2_mvi; |
| 352 | 351 |
c->pix_abs[0][2] = pix_abs16x16_y2_mvi; |
| ... | ... |
@@ -60,9 +60,8 @@ pix_abs16x16_mvi_asm: |
| 60 | 60 |
jsr AT, (AT), _mcount |
| 61 | 61 |
#endif |
| 62 | 62 |
|
| 63 |
- and a1, 7, t0 |
|
| 63 |
+ and a2, 7, t0 |
|
| 64 | 64 |
clr v0 |
| 65 |
- lda a3, 16 |
|
| 66 | 65 |
beq t0, $aligned |
| 67 | 66 |
.align 4 |
| 68 | 67 |
$unaligned: |
| ... | ... |
@@ -86,80 +85,80 @@ $unaligned: |
| 86 | 86 |
td: error right */ |
| 87 | 87 |
|
| 88 | 88 |
/* load line 0 */ |
| 89 |
- ldq_u t0, 0(a1) # left_u |
|
| 90 |
- ldq_u t1, 8(a1) # mid |
|
| 91 |
- ldq_u t2, 16(a1) # right_u |
|
| 92 |
- ldq t3, 0(a0) # ref left |
|
| 93 |
- ldq t4, 8(a0) # ref right |
|
| 94 |
- addq a0, a2, a0 # pix1 |
|
| 95 |
- addq a1, a2, a1 # pix2 |
|
| 89 |
+ ldq_u t0, 0(a2) # left_u |
|
| 90 |
+ ldq_u t1, 8(a2) # mid |
|
| 91 |
+ ldq_u t2, 16(a2) # right_u |
|
| 92 |
+ ldq t3, 0(a1) # ref left |
|
| 93 |
+ ldq t4, 8(a1) # ref right |
|
| 94 |
+ addq a1, a3, a1 # pix1 |
|
| 95 |
+ addq a2, a3, a2 # pix2 |
|
| 96 | 96 |
/* load line 1 */ |
| 97 |
- ldq_u t5, 0(a1) # left_u |
|
| 98 |
- ldq_u t6, 8(a1) # mid |
|
| 99 |
- ldq_u t7, 16(a1) # right_u |
|
| 100 |
- ldq t8, 0(a0) # ref left |
|
| 101 |
- ldq t9, 8(a0) # ref right |
|
| 102 |
- addq a0, a2, a0 # pix1 |
|
| 103 |
- addq a1, a2, a1 # pix2 |
|
| 97 |
+ ldq_u t5, 0(a2) # left_u |
|
| 98 |
+ ldq_u t6, 8(a2) # mid |
|
| 99 |
+ ldq_u t7, 16(a2) # right_u |
|
| 100 |
+ ldq t8, 0(a1) # ref left |
|
| 101 |
+ ldq t9, 8(a1) # ref right |
|
| 102 |
+ addq a1, a3, a1 # pix1 |
|
| 103 |
+ addq a2, a3, a2 # pix2 |
|
| 104 | 104 |
/* calc line 0 */ |
| 105 |
- extql t0, a1, t0 # left lo |
|
| 106 |
- extqh t1, a1, ta # left hi |
|
| 107 |
- extql t1, a1, tb # right lo |
|
| 105 |
+ extql t0, a2, t0 # left lo |
|
| 106 |
+ extqh t1, a2, ta # left hi |
|
| 107 |
+ extql t1, a2, tb # right lo |
|
| 108 | 108 |
or t0, ta, t0 # left |
| 109 |
- extqh t2, a1, t2 # right hi |
|
| 109 |
+ extqh t2, a2, t2 # right hi |
|
| 110 | 110 |
perr t3, t0, tc # error left |
| 111 | 111 |
or t2, tb, t2 # right |
| 112 | 112 |
perr t4, t2, td # error right |
| 113 | 113 |
addq v0, tc, v0 # add error left |
| 114 | 114 |
addq v0, td, v0 # add error left |
| 115 | 115 |
/* calc line 1 */ |
| 116 |
- extql t5, a1, t5 # left lo |
|
| 117 |
- extqh t6, a1, ta # left hi |
|
| 118 |
- extql t6, a1, tb # right lo |
|
| 116 |
+ extql t5, a2, t5 # left lo |
|
| 117 |
+ extqh t6, a2, ta # left hi |
|
| 118 |
+ extql t6, a2, tb # right lo |
|
| 119 | 119 |
or t5, ta, t5 # left |
| 120 |
- extqh t7, a1, t7 # right hi |
|
| 120 |
+ extqh t7, a2, t7 # right hi |
|
| 121 | 121 |
perr t8, t5, tc # error left |
| 122 | 122 |
or t7, tb, t7 # right |
| 123 | 123 |
perr t9, t7, td # error right |
| 124 | 124 |
addq v0, tc, v0 # add error left |
| 125 | 125 |
addq v0, td, v0 # add error left |
| 126 | 126 |
/* loop */ |
| 127 |
- subq a3, 2, a3 # h -= 2 |
|
| 128 |
- bne a3, $unaligned |
|
| 127 |
+ subq a4, 2, a4 # h -= 2 |
|
| 128 |
+ bne a4, $unaligned |
|
| 129 | 129 |
ret |
| 130 | 130 |
|
| 131 | 131 |
.align 4 |
| 132 | 132 |
$aligned: |
| 133 | 133 |
/* load line 0 */ |
| 134 |
- ldq t0, 0(a1) # left |
|
| 135 |
- ldq t1, 8(a1) # right |
|
| 136 |
- addq a1, a2, a1 # pix2 |
|
| 137 |
- ldq t2, 0(a0) # ref left |
|
| 138 |
- ldq t3, 8(a0) # ref right |
|
| 139 |
- addq a0, a2, a0 # pix1 |
|
| 134 |
+ ldq t0, 0(a2) # left |
|
| 135 |
+ ldq t1, 8(a2) # right |
|
| 136 |
+ addq a2, a3, a2 # pix2 |
|
| 137 |
+ ldq t2, 0(a1) # ref left |
|
| 138 |
+ ldq t3, 8(a1) # ref right |
|
| 139 |
+ addq a1, a3, a1 # pix1 |
|
| 140 | 140 |
/* load line 1 */ |
| 141 |
- ldq t4, 0(a1) # left |
|
| 142 |
- ldq t5, 8(a1) # right |
|
| 143 |
- addq a1, a2, a1 # pix2 |
|
| 144 |
- ldq t6, 0(a0) # ref left |
|
| 145 |
- ldq t7, 8(a0) # ref right |
|
| 146 |
- addq a0, a2, a0 # pix1 |
|
| 141 |
+ ldq t4, 0(a2) # left |
|
| 142 |
+ ldq t5, 8(a2) # right |
|
| 143 |
+ addq a2, a3, a2 # pix2 |
|
| 144 |
+ ldq t6, 0(a1) # ref left |
|
| 145 |
+ ldq t7, 8(a1) # ref right |
|
| 146 |
+ addq a1, a3, a1 # pix1 |
|
| 147 | 147 |
/* load line 2 */ |
| 148 |
- ldq t8, 0(a1) # left |
|
| 149 |
- ldq t9, 8(a1) # right |
|
| 150 |
- addq a1, a2, a1 # pix2 |
|
| 151 |
- ldq ta, 0(a0) # ref left |
|
| 152 |
- ldq tb, 8(a0) # ref right |
|
| 153 |
- addq a0, a2, a0 # pix1 |
|
| 148 |
+ ldq t8, 0(a2) # left |
|
| 149 |
+ ldq t9, 8(a2) # right |
|
| 150 |
+ addq a2, a3, a2 # pix2 |
|
| 151 |
+ ldq ta, 0(a1) # ref left |
|
| 152 |
+ ldq tb, 8(a1) # ref right |
|
| 153 |
+ addq a1, a3, a1 # pix1 |
|
| 154 | 154 |
/* load line 3 */ |
| 155 |
- ldq tc, 0(a1) # left |
|
| 156 |
- ldq td, 8(a1) # right |
|
| 157 |
- addq a1, a2, a1 # pix2 |
|
| 158 |
- ldq te, 0(a0) # ref left |
|
| 159 |
- ldq tf, 8(a0) # ref right |
|
| 155 |
+ ldq tc, 0(a2) # left |
|
| 156 |
+ ldq td, 8(a2) # right |
|
| 157 |
+ addq a2, a3, a2 # pix2 |
|
| 158 |
+ ldq te, 0(a1) # ref left |
|
| 159 |
+ ldq a0, 8(a1) # ref right |
|
| 160 | 160 |
/* calc line 0 */ |
| 161 | 161 |
perr t0, t2, t0 # error left |
| 162 |
- addq a0, a2, a0 # pix1 |
|
| 162 |
+ addq a1, a3, a1 # pix1 |
|
| 163 | 163 |
perr t1, t3, t1 # error right |
| 164 | 164 |
addq v0, t0, v0 # add error left |
| 165 | 165 |
/* calc line 1 */ |
| ... | ... |
@@ -175,11 +174,11 @@ $aligned: |
| 175 | 175 |
/* calc line 3 */ |
| 176 | 176 |
perr tc, te, t0 # error left |
| 177 | 177 |
addq v0, t1, v0 # add error right |
| 178 |
- perr td, tf, t1 # error right |
|
| 178 |
+ perr td, a0, t1 # error right |
|
| 179 | 179 |
addq v0, t0, v0 # add error left |
| 180 | 180 |
addq v0, t1, v0 # add error right |
| 181 | 181 |
/* loop */ |
| 182 |
- subq a3, 4, a3 # h -= 4 |
|
| 183 |
- bne a3, $aligned |
|
| 182 |
+ subq a4, 4, a4 # h -= 4 |
|
| 183 |
+ bne a4, $aligned |
|
| 184 | 184 |
ret |
| 185 | 185 |
.end pix_abs16x16_mvi_asm |