Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
... | ... |
@@ -98,13 +98,8 @@ void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, |
98 | 98 |
|
99 | 99 |
av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx) |
100 | 100 |
{ |
101 |
- const int high_bit_depth = avctx->bits_per_raw_sample > 8; |
|
102 |
- |
|
103 | 101 |
/* amask clears all bits that correspond to present features. */ |
104 | 102 |
if (amask(AMASK_MVI) == 0) { |
105 |
- if (!high_bit_depth) |
|
106 |
- c->get_pixels = get_pixels_mvi; |
|
107 |
- c->diff_pixels = diff_pixels_mvi; |
|
108 | 103 |
c->sad[0] = pix_abs16x16_mvi_asm; |
109 | 104 |
c->sad[1] = pix_abs8x8_mvi; |
110 | 105 |
c->pix_abs[0][0] = pix_abs16x16_mvi_asm; |
... | ... |
@@ -22,10 +22,6 @@ |
22 | 22 |
#include <stddef.h> |
23 | 23 |
#include <stdint.h> |
24 | 24 |
|
25 |
-void get_pixels_mvi(int16_t *restrict block, |
|
26 |
- const uint8_t *restrict pixels, int line_size); |
|
27 |
-void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, |
|
28 |
- int stride); |
|
29 | 25 |
int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
30 | 26 |
int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
31 | 27 |
int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
... | ... |
@@ -22,51 +22,6 @@ |
22 | 22 |
#include "dsputil_alpha.h" |
23 | 23 |
#include "asm.h" |
24 | 24 |
|
25 |
-void get_pixels_mvi(int16_t *restrict block, |
|
26 |
- const uint8_t *restrict pixels, int line_size) |
|
27 |
-{ |
|
28 |
- int h = 8; |
|
29 |
- |
|
30 |
- do { |
|
31 |
- uint64_t p; |
|
32 |
- |
|
33 |
- p = ldq(pixels); |
|
34 |
- stq(unpkbw(p), block); |
|
35 |
- stq(unpkbw(p >> 32), block + 4); |
|
36 |
- |
|
37 |
- pixels += line_size; |
|
38 |
- block += 8; |
|
39 |
- } while (--h); |
|
40 |
-} |
|
41 |
- |
|
42 |
-void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, |
|
43 |
- int stride) { |
|
44 |
- int h = 8; |
|
45 |
- uint64_t mask = 0x4040; |
|
46 |
- |
|
47 |
- mask |= mask << 16; |
|
48 |
- mask |= mask << 32; |
|
49 |
- do { |
|
50 |
- uint64_t x, y, c, d, a; |
|
51 |
- uint64_t signs; |
|
52 |
- |
|
53 |
- x = ldq(s1); |
|
54 |
- y = ldq(s2); |
|
55 |
- c = cmpbge(x, y); |
|
56 |
- d = x - y; |
|
57 |
- a = zap(mask, c); /* We use 0x4040404040404040 here... */ |
|
58 |
- d += 4 * a; /* ...so we can use s4addq here. */ |
|
59 |
- signs = zap(-1, c); |
|
60 |
- |
|
61 |
- stq(unpkbw(d) | (unpkbw(signs) << 8), block); |
|
62 |
- stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4); |
|
63 |
- |
|
64 |
- s1 += stride; |
|
65 |
- s2 += stride; |
|
66 |
- block += 8; |
|
67 |
- } while (--h); |
|
68 |
-} |
|
69 |
- |
|
70 | 25 |
static inline uint64_t avg2(uint64_t a, uint64_t b) |
71 | 26 |
{ |
72 | 27 |
return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); |
73 | 28 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,79 @@ |
0 |
+/* |
|
1 |
+ * SIMD-optimized pixel operations |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include "libavutil/attributes.h" |
|
21 |
+#include "libavcodec/dsputil.h" |
|
22 |
+#include "libavcodec/pixblockdsp.h" |
|
23 |
+#include "asm.h" |
|
24 |
+ |
|
25 |
+static void get_pixels_mvi(int16_t *restrict block, |
|
26 |
+ const uint8_t *restrict pixels, int line_size) |
|
27 |
+{ |
|
28 |
+ int h = 8; |
|
29 |
+ |
|
30 |
+ do { |
|
31 |
+ uint64_t p; |
|
32 |
+ |
|
33 |
+ p = ldq(pixels); |
|
34 |
+ stq(unpkbw(p), block); |
|
35 |
+ stq(unpkbw(p >> 32), block + 4); |
|
36 |
+ |
|
37 |
+ pixels += line_size; |
|
38 |
+ block += 8; |
|
39 |
+ } while (--h); |
|
40 |
+} |
|
41 |
+ |
|
42 |
+static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, |
|
43 |
+ int stride) { |
|
44 |
+ int h = 8; |
|
45 |
+ uint64_t mask = 0x4040; |
|
46 |
+ |
|
47 |
+ mask |= mask << 16; |
|
48 |
+ mask |= mask << 32; |
|
49 |
+ do { |
|
50 |
+ uint64_t x, y, c, d, a; |
|
51 |
+ uint64_t signs; |
|
52 |
+ |
|
53 |
+ x = ldq(s1); |
|
54 |
+ y = ldq(s2); |
|
55 |
+ c = cmpbge(x, y); |
|
56 |
+ d = x - y; |
|
57 |
+ a = zap(mask, c); /* We use 0x4040404040404040 here... */ |
|
58 |
+ d += 4 * a; /* ...so we can use s4addq here. */ |
|
59 |
+ signs = zap(-1, c); |
|
60 |
+ |
|
61 |
+ stq(unpkbw(d) | (unpkbw(signs) << 8), block); |
|
62 |
+ stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4); |
|
63 |
+ |
|
64 |
+ s1 += stride; |
|
65 |
+ s2 += stride; |
|
66 |
+ block += 8; |
|
67 |
+ } while (--h); |
|
68 |
+} |
|
69 |
+ |
|
70 |
+av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx, |
|
71 |
+ unsigned high_bit_depth) |
|
72 |
+{ |
|
73 |
+ if (amask(AMASK_MVI) == 0) { |
|
74 |
+ if (!high_bit_depth) |
|
75 |
+ c->get_pixels = get_pixels_mvi; |
|
76 |
+ c->diff_pixels = diff_pixels_mvi; |
|
77 |
+ } |
|
78 |
+} |
... | ... |
@@ -71,6 +71,8 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) |
71 | 71 |
break; |
72 | 72 |
} |
73 | 73 |
|
74 |
+ if (ARCH_ALPHA) |
|
75 |
+ ff_pixblockdsp_init_alpha(c, avctx, high_bit_depth); |
|
74 | 76 |
if (ARCH_ARM) |
75 | 77 |
ff_pixblockdsp_init_arm(c, avctx, high_bit_depth); |
76 | 78 |
if (ARCH_PPC) |
... | ... |
@@ -34,6 +34,8 @@ typedef struct PixblockDSPContext { |
34 | 34 |
} PixblockDSPContext; |
35 | 35 |
|
36 | 36 |
void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); |
37 |
+void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx, |
|
38 |
+ unsigned high_bit_depth); |
|
37 | 39 |
void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, |
38 | 40 |
unsigned high_bit_depth); |
39 | 41 |
void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, |