Signed-off-by: James Almer <jamrial@gmail.com>
James Almer authored on 2017/01/08 11:13:48... | ... |
@@ -2115,6 +2115,7 @@ CONFIG_EXTRA=" |
2115 | 2115 |
libx262 |
2116 | 2116 |
llauddsp |
2117 | 2117 |
llviddsp |
2118 |
+ llvidencdsp |
|
2118 | 2119 |
lpc |
2119 | 2120 |
lzf |
2120 | 2121 |
me_cmp |
... | ... |
@@ -2366,7 +2367,7 @@ amv_decoder_select="sp5x_decoder exif" |
2366 | 2366 |
amv_encoder_select="aandcttables jpegtables mpegvideoenc" |
2367 | 2367 |
ape_decoder_select="bswapdsp llauddsp" |
2368 | 2368 |
apng_decoder_select="zlib" |
2369 |
-apng_encoder_select="huffyuvencdsp zlib" |
|
2369 |
+apng_encoder_select="llvidencdsp zlib" |
|
2370 | 2370 |
asv1_decoder_select="blockdsp bswapdsp idctdsp" |
2371 | 2371 |
asv1_encoder_select="bswapdsp fdctdsp pixblockdsp" |
2372 | 2372 |
asv2_decoder_select="blockdsp bswapdsp idctdsp" |
... | ... |
@@ -2430,7 +2431,7 @@ hap_encoder_deps="libsnappy" |
2430 | 2430 |
hap_encoder_select="texturedspenc" |
2431 | 2431 |
hevc_decoder_select="bswapdsp cabac golomb videodsp" |
2432 | 2432 |
huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp" |
2433 |
-huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp" |
|
2433 |
+huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp" |
|
2434 | 2434 |
iac_decoder_select="imc_decoder" |
2435 | 2435 |
imc_decoder_select="bswapdsp fft mdct sinewin" |
2436 | 2436 |
indeo3_decoder_select="hpeldsp" |
... | ... |
@@ -2491,7 +2492,7 @@ on2avc_decoder_select="mdct" |
2491 | 2491 |
opus_decoder_deps="swresample" |
2492 | 2492 |
opus_decoder_select="imdct15" |
2493 | 2493 |
png_decoder_select="zlib" |
2494 |
-png_encoder_select="huffyuvencdsp zlib" |
|
2494 |
+png_encoder_select="llvidencdsp zlib" |
|
2495 | 2495 |
prores_decoder_select="blockdsp idctdsp" |
2496 | 2496 |
prores_encoder_select="fdctdsp" |
2497 | 2497 |
qcelp_decoder_select="lsp" |
... | ... |
@@ -2534,7 +2535,7 @@ tscc_decoder_select="zlib" |
2534 | 2534 |
twinvq_decoder_select="mdct lsp sinewin" |
2535 | 2535 |
txd_decoder_select="texturedsp" |
2536 | 2536 |
utvideo_decoder_select="bswapdsp llviddsp" |
2537 |
-utvideo_encoder_select="bswapdsp huffman huffyuvencdsp" |
|
2537 |
+utvideo_encoder_select="bswapdsp huffman llvidencdsp" |
|
2538 | 2538 |
vble_decoder_select="llviddsp" |
2539 | 2539 |
vc1_decoder_select="blockdsp h263_decoder h264qpel intrax8 mpegvideo vc1dsp" |
2540 | 2540 |
vc1_qsv_decoder_deps="libmfx" |
... | ... |
@@ -91,6 +91,7 @@ OBJS-$(CONFIG_JPEGTABLES) += jpegtables.o |
91 | 91 |
OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o |
92 | 92 |
OBJS-$(CONFIG_LLAUDDSP) += lossless_audiodsp.o |
93 | 93 |
OBJS-$(CONFIG_LLVIDDSP) += lossless_videodsp.o |
94 |
+OBJS-$(CONFIG_LLVIDENCDSP) += lossless_videoencdsp.o |
|
94 | 95 |
OBJS-$(CONFIG_LPC) += lpc.o |
95 | 96 |
OBJS-$(CONFIG_LSP) += lsp.o |
96 | 97 |
OBJS-$(CONFIG_LZF) += lzf.o |
... | ... |
@@ -38,6 +38,7 @@ |
38 | 38 |
#include "huffyuvencdsp.h" |
39 | 39 |
#include "put_bits.h" |
40 | 40 |
#include "lossless_videodsp.h" |
41 |
+#include "lossless_videoencdsp.h" |
|
41 | 42 |
|
42 | 43 |
#define VLC_BITS 12 |
43 | 44 |
|
... | ... |
@@ -89,6 +90,7 @@ typedef struct HYuvContext { |
89 | 89 |
HuffYUVDSPContext hdsp; |
90 | 90 |
HuffYUVEncDSPContext hencdsp; |
91 | 91 |
LLVidDSPContext llviddsp; |
92 |
+ LLVidEncDSPContext llvidencdsp; |
|
92 | 93 |
int non_determ; // non-deterministic, multi-threaded encoder allowed |
93 | 94 |
} HYuvContext; |
94 | 95 |
|
... | ... |
@@ -33,6 +33,7 @@ |
33 | 33 |
#include "huffman.h" |
34 | 34 |
#include "huffyuvencdsp.h" |
35 | 35 |
#include "internal.h" |
36 |
+#include "lossless_videoencdsp.h" |
|
36 | 37 |
#include "put_bits.h" |
37 | 38 |
#include "libavutil/opt.h" |
38 | 39 |
#include "libavutil/pixdesc.h" |
... | ... |
@@ -41,7 +42,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst, |
41 | 41 |
const uint8_t *src0, const uint8_t *src1, int w) |
42 | 42 |
{ |
43 | 43 |
if (s->bps <= 8) { |
44 |
- s->hencdsp.diff_bytes(dst, src0, src1, w); |
|
44 |
+ s->llvidencdsp.diff_bytes(dst, src0, src1, w); |
|
45 | 45 |
} else { |
46 | 46 |
s->hencdsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w); |
47 | 47 |
} |
... | ... |
@@ -65,7 +66,7 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, |
65 | 65 |
dst[i] = temp - left; |
66 | 66 |
left = temp; |
67 | 67 |
} |
68 |
- s->hencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32); |
|
68 |
+ s->llvidencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32); |
|
69 | 69 |
return src[w-1]; |
70 | 70 |
} |
71 | 71 |
} else { |
... | ... |
@@ -117,7 +118,7 @@ static inline void sub_left_prediction_bgr32(HYuvContext *s, uint8_t *dst, |
117 | 117 |
a = at; |
118 | 118 |
} |
119 | 119 |
|
120 |
- s->hencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16); |
|
120 |
+ s->llvidencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16); |
|
121 | 121 |
|
122 | 122 |
*red = src[(w - 1) * 4 + R]; |
123 | 123 |
*green = src[(w - 1) * 4 + G]; |
... | ... |
@@ -146,7 +147,7 @@ static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst, |
146 | 146 |
b = bt; |
147 | 147 |
} |
148 | 148 |
|
149 |
- s->hencdsp.diff_bytes(dst + 48, src + 48, src + 48 - 3, w * 3 - 48); |
|
149 |
+ s->llvidencdsp.diff_bytes(dst + 48, src + 48, src + 48 - 3, w * 3 - 48); |
|
150 | 150 |
|
151 | 151 |
*red = src[(w - 1) * 3 + 0]; |
152 | 152 |
*green = src[(w - 1) * 3 + 1]; |
... | ... |
@@ -156,7 +157,7 @@ static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst, |
156 | 156 |
static void sub_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top) |
157 | 157 |
{ |
158 | 158 |
if (s->bps <= 8) { |
159 |
- s->hencdsp.sub_hfyu_median_pred(dst, src1, src2, w , left, left_top); |
|
159 |
+ s->llvidencdsp.sub_median_pred(dst, src1, src2, w , left, left_top); |
|
160 | 160 |
} else { |
161 | 161 |
s->hencdsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top); |
162 | 162 |
} |
... | ... |
@@ -218,6 +219,7 @@ static av_cold int encode_init(AVCodecContext *avctx) |
218 | 218 |
|
219 | 219 |
ff_huffyuv_common_init(avctx); |
220 | 220 |
ff_huffyuvencdsp_init(&s->hencdsp, avctx); |
221 |
+ ff_llvidencdsp_init(&s->llvidencdsp); |
|
221 | 222 |
|
222 | 223 |
avctx->extradata = av_mallocz(3*MAX_N + 4); |
223 | 224 |
if (s->flags&AV_CODEC_FLAG_PASS1) { |
... | ... |
@@ -823,9 +825,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
823 | 823 |
lefttopy = p->data[0][3]; |
824 | 824 |
lefttopu = p->data[1][1]; |
825 | 825 |
lefttopv = p->data[2][1]; |
826 |
- s->hencdsp.sub_hfyu_median_pred(s->temp[0], p->data[0] + 4, p->data[0] + fake_ystride + 4, width - 4, &lefty, &lefttopy); |
|
827 |
- s->hencdsp.sub_hfyu_median_pred(s->temp[1], p->data[1] + 2, p->data[1] + fake_ustride + 2, width2 - 2, &leftu, &lefttopu); |
|
828 |
- s->hencdsp.sub_hfyu_median_pred(s->temp[2], p->data[2] + 2, p->data[2] + fake_vstride + 2, width2 - 2, &leftv, &lefttopv); |
|
826 |
+ s->llvidencdsp.sub_median_pred(s->temp[0], p->data[0] + 4, p->data[0] + fake_ystride + 4, width - 4, &lefty, &lefttopy); |
|
827 |
+ s->llvidencdsp.sub_median_pred(s->temp[1], p->data[1] + 2, p->data[1] + fake_ustride + 2, width2 - 2, &leftu, &lefttopu); |
|
828 |
+ s->llvidencdsp.sub_median_pred(s->temp[2], p->data[2] + 2, p->data[2] + fake_vstride + 2, width2 - 2, &leftv, &lefttopv); |
|
829 | 829 |
encode_422_bitstream(s, 0, width - 4); |
830 | 830 |
y++; cy++; |
831 | 831 |
|
... | ... |
@@ -835,7 +837,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
835 | 835 |
if (s->bitstream_bpp == 12) { |
836 | 836 |
while (2 * cy > y) { |
837 | 837 |
ydst = p->data[0] + p->linesize[0] * y; |
838 |
- s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); |
|
838 |
+ s->llvidencdsp.sub_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); |
|
839 | 839 |
encode_gray_bitstream(s, width); |
840 | 840 |
y++; |
841 | 841 |
} |
... | ... |
@@ -845,9 +847,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
845 | 845 |
udst = p->data[1] + p->linesize[1] * cy; |
846 | 846 |
vdst = p->data[2] + p->linesize[2] * cy; |
847 | 847 |
|
848 |
- s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); |
|
849 |
- s->hencdsp.sub_hfyu_median_pred(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); |
|
850 |
- s->hencdsp.sub_hfyu_median_pred(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); |
|
848 |
+ s->llvidencdsp.sub_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); |
|
849 |
+ s->llvidencdsp.sub_median_pred(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); |
|
850 |
+ s->llvidencdsp.sub_median_pred(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); |
|
851 | 851 |
|
852 | 852 |
encode_422_bitstream(s, 0, width); |
853 | 853 |
} |
... | ... |
@@ -860,7 +862,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
860 | 860 |
ydst = p->data[0] + p->linesize[0] * y; |
861 | 861 |
|
862 | 862 |
if (s->predictor == PLANE && s->interlaced < y) { |
863 |
- s->hencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); |
|
863 |
+ s->llvidencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); |
|
864 | 864 |
|
865 | 865 |
lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); |
866 | 866 |
} else { |
... | ... |
@@ -876,9 +878,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
876 | 876 |
vdst = p->data[2] + p->linesize[2] * cy; |
877 | 877 |
|
878 | 878 |
if (s->predictor == PLANE && s->interlaced < cy) { |
879 |
- s->hencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); |
|
880 |
- s->hencdsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2); |
|
881 |
- s->hencdsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); |
|
879 |
+ s->llvidencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); |
|
880 |
+ s->llvidencdsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2); |
|
881 |
+ s->llvidencdsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); |
|
882 | 882 |
|
883 | 883 |
lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); |
884 | 884 |
leftu = sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu); |
... | ... |
@@ -911,7 +913,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
911 | 911 |
for (y = 1; y < s->height; y++) { |
912 | 912 |
uint8_t *dst = data + y*stride; |
913 | 913 |
if (s->predictor == PLANE && s->interlaced < y) { |
914 |
- s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width * 4); |
|
914 |
+ s->llvidencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width * 4); |
|
915 | 915 |
sub_left_prediction_bgr32(s, s->temp[0], s->temp[1], width, |
916 | 916 |
&leftr, &leftg, &leftb, &lefta); |
917 | 917 |
} else { |
... | ... |
@@ -939,7 +941,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
939 | 939 |
for (y = 1; y < s->height; y++) { |
940 | 940 |
uint8_t *dst = data + y * stride; |
941 | 941 |
if (s->predictor == PLANE && s->interlaced < y) { |
942 |
- s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, |
|
942 |
+ s->llvidencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, |
|
943 | 943 |
width * 3); |
944 | 944 |
sub_left_prediction_rgb24(s, s->temp[0], s->temp[1], width, |
945 | 945 |
&leftr, &leftg, &leftb); |
... | ... |
@@ -21,38 +21,6 @@ |
21 | 21 |
#include "huffyuvencdsp.h" |
22 | 22 |
#include "mathops.h" |
23 | 23 |
|
24 |
-// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size |
|
25 |
-#define pb_7f (~0UL / 255 * 0x7f) |
|
26 |
-#define pb_80 (~0UL / 255 * 0x80) |
|
27 |
- |
|
28 |
-static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w) |
|
29 |
-{ |
|
30 |
- long i; |
|
31 |
- |
|
32 |
-#if !HAVE_FAST_UNALIGNED |
|
33 |
- if (((long)src1 | (long)src2) & (sizeof(long) - 1)) { |
|
34 |
- for (i = 0; i + 7 < w; i += 8) { |
|
35 |
- dst[i + 0] = src1[i + 0] - src2[i + 0]; |
|
36 |
- dst[i + 1] = src1[i + 1] - src2[i + 1]; |
|
37 |
- dst[i + 2] = src1[i + 2] - src2[i + 2]; |
|
38 |
- dst[i + 3] = src1[i + 3] - src2[i + 3]; |
|
39 |
- dst[i + 4] = src1[i + 4] - src2[i + 4]; |
|
40 |
- dst[i + 5] = src1[i + 5] - src2[i + 5]; |
|
41 |
- dst[i + 6] = src1[i + 6] - src2[i + 6]; |
|
42 |
- dst[i + 7] = src1[i + 7] - src2[i + 7]; |
|
43 |
- } |
|
44 |
- } else |
|
45 |
-#endif |
|
46 |
- for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { |
|
47 |
- long a = *(long *) (src1 + i); |
|
48 |
- long b = *(long *) (src2 + i); |
|
49 |
- *(long *) (dst + i) = ((a | pb_80) - (b & pb_7f)) ^ |
|
50 |
- ((a ^ b ^ pb_80) & pb_80); |
|
51 |
- } |
|
52 |
- for (; i < w; i++) |
|
53 |
- dst[i + 0] = src1[i + 0] - src2[i + 0]; |
|
54 |
-} |
|
55 |
- |
|
56 | 24 |
static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){ |
57 | 25 |
long i; |
58 | 26 |
#if !HAVE_FAST_UNALIGNED |
... | ... |
@@ -79,27 +47,6 @@ static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *sr |
79 | 79 |
dst[i] = (src1[i] - src2[i]) & mask; |
80 | 80 |
} |
81 | 81 |
|
82 |
-static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1, |
|
83 |
- const uint8_t *src2, intptr_t w, |
|
84 |
- int *left, int *left_top) |
|
85 |
-{ |
|
86 |
- int i; |
|
87 |
- uint8_t l, lt; |
|
88 |
- |
|
89 |
- l = *left; |
|
90 |
- lt = *left_top; |
|
91 |
- |
|
92 |
- for (i = 0; i < w; i++) { |
|
93 |
- const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF); |
|
94 |
- lt = src1[i]; |
|
95 |
- l = src2[i]; |
|
96 |
- dst[i] = l - pred; |
|
97 |
- } |
|
98 |
- |
|
99 |
- *left = l; |
|
100 |
- *left_top = lt; |
|
101 |
-} |
|
102 |
- |
|
103 | 82 |
static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){ |
104 | 83 |
int i; |
105 | 84 |
uint16_t l, lt; |
... | ... |
@@ -120,9 +67,7 @@ static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, co |
120 | 120 |
|
121 | 121 |
av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx) |
122 | 122 |
{ |
123 |
- c->diff_bytes = diff_bytes_c; |
|
124 | 123 |
c->diff_int16 = diff_int16_c; |
125 |
- c->sub_hfyu_median_pred = sub_hfyu_median_pred_c; |
|
126 | 124 |
c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c; |
127 | 125 |
|
128 | 126 |
if (ARCH_X86) |
... | ... |
@@ -24,22 +24,11 @@ |
24 | 24 |
#include "avcodec.h" |
25 | 25 |
|
26 | 26 |
typedef struct HuffYUVEncDSPContext { |
27 |
- void (*diff_bytes)(uint8_t *dst /* align 16 */, |
|
28 |
- const uint8_t *src1 /* align 16 */, |
|
29 |
- const uint8_t *src2 /* align 1 */, |
|
30 |
- intptr_t w); |
|
31 | 27 |
void (*diff_int16)(uint16_t *dst /* align 16 */, |
32 | 28 |
const uint16_t *src1 /* align 16 */, |
33 | 29 |
const uint16_t *src2 /* align 1 */, |
34 | 30 |
unsigned mask, int w); |
35 | 31 |
|
36 |
- /** |
|
37 |
- * Subtract HuffYUV's variant of median prediction. |
|
38 |
- * Note, this might read from src1[-1], src2[-1]. |
|
39 |
- */ |
|
40 |
- void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1, |
|
41 |
- const uint8_t *src2, intptr_t w, |
|
42 |
- int *left, int *left_top); |
|
43 | 32 |
void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1, |
44 | 33 |
const uint16_t *src2, unsigned mask, |
45 | 34 |
int w, int *left, int *left_top); |
46 | 35 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,84 @@ |
0 |
+/* |
|
1 |
+ * This file is part of FFmpeg. |
|
2 |
+ * |
|
3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include "config.h" |
|
19 |
+#include "libavutil/attributes.h" |
|
20 |
+#include "lossless_videoencdsp.h" |
|
21 |
+#include "mathops.h" |
|
22 |
+ |
|
23 |
+// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size |
|
24 |
+#define pb_7f (~0UL / 255 * 0x7f) |
|
25 |
+#define pb_80 (~0UL / 255 * 0x80) |
|
26 |
+ |
|
27 |
+static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w) |
|
28 |
+{ |
|
29 |
+ long i; |
|
30 |
+ |
|
31 |
+#if !HAVE_FAST_UNALIGNED |
|
32 |
+ if (((long)src1 | (long)src2) & (sizeof(long) - 1)) { |
|
33 |
+ for (i = 0; i + 7 < w; i += 8) { |
|
34 |
+ dst[i + 0] = src1[i + 0] - src2[i + 0]; |
|
35 |
+ dst[i + 1] = src1[i + 1] - src2[i + 1]; |
|
36 |
+ dst[i + 2] = src1[i + 2] - src2[i + 2]; |
|
37 |
+ dst[i + 3] = src1[i + 3] - src2[i + 3]; |
|
38 |
+ dst[i + 4] = src1[i + 4] - src2[i + 4]; |
|
39 |
+ dst[i + 5] = src1[i + 5] - src2[i + 5]; |
|
40 |
+ dst[i + 6] = src1[i + 6] - src2[i + 6]; |
|
41 |
+ dst[i + 7] = src1[i + 7] - src2[i + 7]; |
|
42 |
+ } |
|
43 |
+ } else |
|
44 |
+#endif |
|
45 |
+ for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { |
|
46 |
+ long a = *(long *) (src1 + i); |
|
47 |
+ long b = *(long *) (src2 + i); |
|
48 |
+ *(long *) (dst + i) = ((a | pb_80) - (b & pb_7f)) ^ |
|
49 |
+ ((a ^ b ^ pb_80) & pb_80); |
|
50 |
+ } |
|
51 |
+ for (; i < w; i++) |
|
52 |
+ dst[i + 0] = src1[i + 0] - src2[i + 0]; |
|
53 |
+} |
|
54 |
+ |
|
55 |
+static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1, |
|
56 |
+ const uint8_t *src2, intptr_t w, |
|
57 |
+ int *left, int *left_top) |
|
58 |
+{ |
|
59 |
+ int i; |
|
60 |
+ uint8_t l, lt; |
|
61 |
+ |
|
62 |
+ l = *left; |
|
63 |
+ lt = *left_top; |
|
64 |
+ |
|
65 |
+ for (i = 0; i < w; i++) { |
|
66 |
+ const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF); |
|
67 |
+ lt = src1[i]; |
|
68 |
+ l = src2[i]; |
|
69 |
+ dst[i] = l - pred; |
|
70 |
+ } |
|
71 |
+ |
|
72 |
+ *left = l; |
|
73 |
+ *left_top = lt; |
|
74 |
+} |
|
75 |
+ |
|
76 |
+av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c) |
|
77 |
+{ |
|
78 |
+ c->diff_bytes = diff_bytes_c; |
|
79 |
+ c->sub_median_pred = sub_median_pred_c; |
|
80 |
+ |
|
81 |
+ if (ARCH_X86) |
|
82 |
+ ff_llvidencdsp_init_x86(c); |
|
83 |
+} |
0 | 84 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,41 @@ |
0 |
+/* |
|
1 |
+ * This file is part of FFmpeg. |
|
2 |
+ * |
|
3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#ifndef AVCODEC_LOSSLESS_VIDEOENCDSP_H |
|
19 |
+#define AVCODEC_LOSSLESS_VIDEOENCDSP_H |
|
20 |
+ |
|
21 |
+#include <stdint.h> |
|
22 |
+ |
|
23 |
+typedef struct LLVidEncDSPContext { |
|
24 |
+ void (*diff_bytes)(uint8_t *dst /* align 16 */, |
|
25 |
+ const uint8_t *src1 /* align 16 */, |
|
26 |
+ const uint8_t *src2 /* align 1 */, |
|
27 |
+ intptr_t w); |
|
28 |
+ /** |
|
29 |
+ * Subtract HuffYUV's variant of median prediction. |
|
30 |
+ * Note, this might read from src1[-1], src2[-1]. |
|
31 |
+ */ |
|
32 |
+ void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1, |
|
33 |
+ const uint8_t *src2, intptr_t w, |
|
34 |
+ int *left, int *left_top); |
|
35 |
+} LLVidEncDSPContext; |
|
36 |
+ |
|
37 |
+void ff_llvidencdsp_init(LLVidEncDSPContext *c); |
|
38 |
+void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c); |
|
39 |
+ |
|
40 |
+#endif /* AVCODEC_LOSSLESS_VIDEOENCDSP_H */ |
... | ... |
@@ -22,7 +22,7 @@ |
22 | 22 |
#include "avcodec.h" |
23 | 23 |
#include "internal.h" |
24 | 24 |
#include "bytestream.h" |
25 |
-#include "huffyuvencdsp.h" |
|
25 |
+#include "lossless_videoencdsp.h" |
|
26 | 26 |
#include "png.h" |
27 | 27 |
#include "apng.h" |
28 | 28 |
|
... | ... |
@@ -47,7 +47,7 @@ typedef struct APNGFctlChunk { |
47 | 47 |
|
48 | 48 |
typedef struct PNGEncContext { |
49 | 49 |
AVClass *class; |
50 |
- HuffYUVEncDSPContext hdsp; |
|
50 |
+ LLVidEncDSPContext llvidencdsp; |
|
51 | 51 |
|
52 | 52 |
uint8_t *bytestream; |
53 | 53 |
uint8_t *bytestream_start; |
... | ... |
@@ -159,7 +159,7 @@ static void sub_left_prediction(PNGEncContext *c, uint8_t *dst, const uint8_t *s |
159 | 159 |
for (x = 0; x < unaligned_w; x++) |
160 | 160 |
*dst++ = *src1++ - *src2++; |
161 | 161 |
size -= unaligned_w; |
162 |
- c->hdsp.diff_bytes(dst, src1, src2, size); |
|
162 |
+ c->llvidencdsp.diff_bytes(dst, src1, src2, size); |
|
163 | 163 |
} |
164 | 164 |
|
165 | 165 |
static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type, |
... | ... |
@@ -175,7 +175,7 @@ static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type, |
175 | 175 |
sub_left_prediction(c, dst, src, bpp, size); |
176 | 176 |
break; |
177 | 177 |
case PNG_FILTER_VALUE_UP: |
178 |
- c->hdsp.diff_bytes(dst, src, top, size); |
|
178 |
+ c->llvidencdsp.diff_bytes(dst, src, top, size); |
|
179 | 179 |
break; |
180 | 180 |
case PNG_FILTER_VALUE_AVG: |
181 | 181 |
for (i = 0; i < bpp; i++) |
... | ... |
@@ -1015,7 +1015,7 @@ FF_DISABLE_DEPRECATION_WARNINGS |
1015 | 1015 |
FF_ENABLE_DEPRECATION_WARNINGS |
1016 | 1016 |
#endif |
1017 | 1017 |
|
1018 |
- ff_huffyuvencdsp_init(&s->hdsp, avctx); |
|
1018 |
+ ff_llvidencdsp_init(&s->llvidencdsp); |
|
1019 | 1019 |
|
1020 | 1020 |
#if FF_API_PRIVATE_OPT |
1021 | 1021 |
FF_DISABLE_DEPRECATION_WARNINGS |
... | ... |
@@ -30,8 +30,8 @@ |
30 | 30 |
#include "libavutil/common.h" |
31 | 31 |
#include "avcodec.h" |
32 | 32 |
#include "bswapdsp.h" |
33 |
-#include "huffyuvencdsp.h" |
|
34 | 33 |
#include "lossless_videodsp.h" |
34 |
+#include "lossless_videoencdsp.h" |
|
35 | 35 |
|
36 | 36 |
enum { |
37 | 37 |
PRED_NONE = 0, |
... | ... |
@@ -70,8 +70,8 @@ typedef struct UtvideoContext { |
70 | 70 |
const AVClass *class; |
71 | 71 |
AVCodecContext *avctx; |
72 | 72 |
BswapDSPContext bdsp; |
73 |
- HuffYUVEncDSPContext hdsp; |
|
74 | 73 |
LLVidDSPContext llviddsp; |
74 |
+ LLVidEncDSPContext llvidencdsp; |
|
75 | 75 |
|
76 | 76 |
uint32_t frame_info_size, flags, frame_info; |
77 | 77 |
int planes; |
... | ... |
@@ -33,7 +33,6 @@ |
33 | 33 |
#include "bswapdsp.h" |
34 | 34 |
#include "bytestream.h" |
35 | 35 |
#include "put_bits.h" |
36 |
-#include "huffyuvencdsp.h" |
|
37 | 36 |
#include "mathops.h" |
38 | 37 |
#include "utvideo.h" |
39 | 38 |
#include "huffman.h" |
... | ... |
@@ -120,7 +119,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx) |
120 | 120 |
} |
121 | 121 |
|
122 | 122 |
ff_bswapdsp_init(&c->bdsp); |
123 |
- ff_huffyuvencdsp_init(&c->hdsp, avctx); |
|
123 |
+ ff_llvidencdsp_init(&c->llvidencdsp); |
|
124 | 124 |
|
125 | 125 |
#if FF_API_PRIVATE_OPT |
126 | 126 |
FF_DISABLE_DEPRECATION_WARNINGS |
... | ... |
@@ -324,7 +323,7 @@ static void median_predict(UtvideoContext *c, uint8_t *src, uint8_t *dst, int st |
324 | 324 |
|
325 | 325 |
/* Rest of the coded part uses median prediction */ |
326 | 326 |
for (j = 1; j < height; j++) { |
327 |
- c->hdsp.sub_hfyu_median_pred(dst, src - stride, src, width, &A, &B); |
|
327 |
+ c->llvidencdsp.sub_median_pred(dst, src - stride, src, width, &A, &B); |
|
328 | 328 |
dst += width; |
329 | 329 |
src += stride; |
330 | 330 |
} |
... | ... |
@@ -20,8 +20,9 @@ OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o |
20 | 20 |
OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o |
21 | 21 |
OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp_init.o |
22 | 22 |
OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp_init.o |
23 |
+OBJS-$(CONFIG_LLVIDENCDSP) += x86/lossless_videoencdsp_init.o |
|
23 | 24 |
OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o |
24 |
-OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o |
|
25 |
+OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_init.o |
|
25 | 26 |
OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_init.o |
26 | 27 |
OBJS-$(CONFIG_LPC) += x86/lpc.o |
27 | 28 |
OBJS-$(CONFIG_ME_CMP) += x86/me_cmp_init.o |
... | ... |
@@ -114,6 +115,7 @@ YASM-OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp.o |
114 | 114 |
YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o |
115 | 115 |
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o |
116 | 116 |
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o |
117 |
+YASM-OBJS-$(CONFIG_LLVIDENCDSP) += x86/lossless_videoencdsp.o |
|
117 | 118 |
YASM-OBJS-$(CONFIG_ME_CMP) += x86/me_cmp.o |
118 | 119 |
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o |
119 | 120 |
YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o |
... | ... |
@@ -27,128 +27,8 @@ |
27 | 27 |
|
28 | 28 |
section .text |
29 | 29 |
|
30 |
-; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
31 |
-; intptr_t w); |
|
32 |
-%macro DIFF_BYTES_PROLOGUE 0 |
|
33 |
-%if ARCH_X86_32 |
|
34 |
-cglobal diff_bytes, 3,5,2, dst, src1, src2 |
|
35 |
-%define wq r4q |
|
36 |
- DECLARE_REG_TMP 3 |
|
37 |
- mov wq, r3mp |
|
38 |
-%else |
|
39 |
-cglobal diff_bytes, 4,5,2, dst, src1, src2, w |
|
40 |
- DECLARE_REG_TMP 4 |
|
41 |
-%endif ; ARCH_X86_32 |
|
42 |
-%define i t0q |
|
43 |
-%endmacro |
|
44 |
- |
|
45 |
-; label to jump to if w < regsize |
|
46 |
-%macro DIFF_BYTES_LOOP_PREP 1 |
|
47 |
- mov i, wq |
|
48 |
- and i, -2 * regsize |
|
49 |
- jz %1 |
|
50 |
- add dstq, i |
|
51 |
- add src1q, i |
|
52 |
- add src2q, i |
|
53 |
- neg i |
|
54 |
-%endmacro |
|
55 |
- |
|
56 |
-; mov type used for src1q, dstq, first reg, second reg |
|
57 |
-%macro DIFF_BYTES_LOOP_CORE 4 |
|
58 |
-%if mmsize != 16 |
|
59 |
- mov%1 %3, [src1q + i] |
|
60 |
- mov%1 %4, [src1q + i + regsize] |
|
61 |
- psubb %3, [src2q + i] |
|
62 |
- psubb %4, [src2q + i + regsize] |
|
63 |
- mov%2 [dstq + i], %3 |
|
64 |
- mov%2 [regsize + dstq + i], %4 |
|
65 |
-%else |
|
66 |
- ; SSE enforces alignment of psubb operand |
|
67 |
- mov%1 %3, [src1q + i] |
|
68 |
- movu %4, [src2q + i] |
|
69 |
- psubb %3, %4 |
|
70 |
- mov%2 [dstq + i], %3 |
|
71 |
- mov%1 %3, [src1q + i + regsize] |
|
72 |
- movu %4, [src2q + i + regsize] |
|
73 |
- psubb %3, %4 |
|
74 |
- mov%2 [regsize + dstq + i], %3 |
|
75 |
-%endif |
|
76 |
-%endmacro |
|
77 |
- |
|
78 |
-%macro DIFF_BYTES_BODY 2 ; mov type used for src1q, for dstq |
|
79 |
- %define regsize mmsize |
|
80 |
-.loop_%1%2: |
|
81 |
- DIFF_BYTES_LOOP_CORE %1, %2, m0, m1 |
|
82 |
- add i, 2 * regsize |
|
83 |
- jl .loop_%1%2 |
|
84 |
-.skip_main_%1%2: |
|
85 |
- and wq, 2 * regsize - 1 |
|
86 |
- jz .end_%1%2 |
|
87 |
-%if mmsize > 16 |
|
88 |
- ; fall back to narrower xmm |
|
89 |
- %define regsize mmsize / 2 |
|
90 |
- DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa |
|
91 |
-.loop2_%1%2: |
|
92 |
- DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1 |
|
93 |
- add i, 2 * regsize |
|
94 |
- jl .loop2_%1%2 |
|
95 |
-.setup_loop_gpr_%1%2: |
|
96 |
- and wq, 2 * regsize - 1 |
|
97 |
- jz .end_%1%2 |
|
98 |
-%endif |
|
99 |
- add dstq, wq |
|
100 |
- add src1q, wq |
|
101 |
- add src2q, wq |
|
102 |
- neg wq |
|
103 |
-.loop_gpr_%1%2: |
|
104 |
- mov t0b, [src1q + wq] |
|
105 |
- sub t0b, [src2q + wq] |
|
106 |
- mov [dstq + wq], t0b |
|
107 |
- inc wq |
|
108 |
- jl .loop_gpr_%1%2 |
|
109 |
-.end_%1%2: |
|
110 |
- REP_RET |
|
111 |
-%endmacro |
|
112 |
- |
|
113 |
-%if ARCH_X86_32 |
|
114 |
-INIT_MMX mmx |
|
115 |
-DIFF_BYTES_PROLOGUE |
|
116 |
- %define regsize mmsize |
|
117 |
- DIFF_BYTES_LOOP_PREP .skip_main_aa |
|
118 |
- DIFF_BYTES_BODY a, a |
|
119 |
-%undef i |
|
120 |
-%endif |
|
121 |
- |
|
122 |
-INIT_XMM sse2 |
|
123 |
-DIFF_BYTES_PROLOGUE |
|
124 |
- %define regsize mmsize |
|
125 |
- DIFF_BYTES_LOOP_PREP .skip_main_aa |
|
126 |
- test dstq, regsize - 1 |
|
127 |
- jnz .loop_uu |
|
128 |
- test src1q, regsize - 1 |
|
129 |
- jnz .loop_ua |
|
130 |
- DIFF_BYTES_BODY a, a |
|
131 |
- DIFF_BYTES_BODY u, a |
|
132 |
- DIFF_BYTES_BODY u, u |
|
133 |
-%undef i |
|
134 |
- |
|
135 |
-%if HAVE_AVX2_EXTERNAL |
|
136 |
-INIT_YMM avx2 |
|
137 |
-DIFF_BYTES_PROLOGUE |
|
138 |
- %define regsize mmsize |
|
139 |
- ; Directly using unaligned SSE2 version is marginally faster than |
|
140 |
- ; branching based on arguments. |
|
141 |
- DIFF_BYTES_LOOP_PREP .skip_main_uu |
|
142 |
- test dstq, regsize - 1 |
|
143 |
- jnz .loop_uu |
|
144 |
- test src1q, regsize - 1 |
|
145 |
- jnz .loop_ua |
|
146 |
- DIFF_BYTES_BODY a, a |
|
147 |
- DIFF_BYTES_BODY u, a |
|
148 |
- DIFF_BYTES_BODY u, u |
|
149 |
-%undef i |
|
150 |
-%endif |
|
151 |
- |
|
30 |
+; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
31 |
+; unsigned mask, int w); |
|
152 | 32 |
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub |
153 | 33 |
movd m4, maskd |
154 | 34 |
SPLATW m4, m4 |
155 | 35 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,54 @@ |
0 |
+/* |
|
1 |
+ * SIMD-optimized HuffYUV encoding functions |
|
2 |
+ * Copyright (c) 2000, 2001 Fabrice Bellard |
|
3 |
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
4 |
+ * |
|
5 |
+ * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
|
6 |
+ * |
|
7 |
+ * This file is part of FFmpeg. |
|
8 |
+ * |
|
9 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
10 |
+ * modify it under the terms of the GNU Lesser General Public |
|
11 |
+ * License as published by the Free Software Foundation; either |
|
12 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
13 |
+ * |
|
14 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
15 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
16 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
17 |
+ * Lesser General Public License for more details. |
|
18 |
+ * |
|
19 |
+ * You should have received a copy of the GNU Lesser General Public |
|
20 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
21 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
22 |
+ */ |
|
23 |
+ |
|
24 |
+#include "libavutil/attributes.h" |
|
25 |
+#include "libavutil/cpu.h" |
|
26 |
+#include "libavutil/pixdesc.h" |
|
27 |
+#include "libavutil/x86/cpu.h" |
|
28 |
+#include "libavcodec/huffyuvencdsp.h" |
|
29 |
+ |
|
30 |
+void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, |
|
31 |
+ unsigned mask, int w); |
|
32 |
+void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, |
|
33 |
+ unsigned mask, int w); |
|
34 |
+void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, |
|
35 |
+ unsigned mask, int w, int *left, int *left_top); |
|
36 |
+ |
|
37 |
+av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx) |
|
38 |
+{ |
|
39 |
+ av_unused int cpu_flags = av_get_cpu_flags(); |
|
40 |
+ const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); |
|
41 |
+ |
|
42 |
+ if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { |
|
43 |
+ c->diff_int16 = ff_diff_int16_mmx; |
|
44 |
+ } |
|
45 |
+ |
|
46 |
+ if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { |
|
47 |
+ c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; |
|
48 |
+ } |
|
49 |
+ |
|
50 |
+ if (EXTERNAL_SSE2(cpu_flags)) { |
|
51 |
+ c->diff_int16 = ff_diff_int16_sse2; |
|
52 |
+ } |
|
53 |
+} |
0 | 54 |
deleted file mode 100644 |
... | ... |
@@ -1,118 +0,0 @@ |
1 |
-/* |
|
2 |
- * SIMD-optimized HuffYUV encoding functions |
|
3 |
- * Copyright (c) 2000, 2001 Fabrice Bellard |
|
4 |
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
5 |
- * |
|
6 |
- * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
|
7 |
- * |
|
8 |
- * This file is part of FFmpeg. |
|
9 |
- * |
|
10 |
- * FFmpeg is free software; you can redistribute it and/or |
|
11 |
- * modify it under the terms of the GNU Lesser General Public |
|
12 |
- * License as published by the Free Software Foundation; either |
|
13 |
- * version 2.1 of the License, or (at your option) any later version. |
|
14 |
- * |
|
15 |
- * FFmpeg is distributed in the hope that it will be useful, |
|
16 |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
17 |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
18 |
- * Lesser General Public License for more details. |
|
19 |
- * |
|
20 |
- * You should have received a copy of the GNU Lesser General Public |
|
21 |
- * License along with FFmpeg; if not, write to the Free Software |
|
22 |
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
23 |
- */ |
|
24 |
- |
|
25 |
-#include "libavutil/attributes.h" |
|
26 |
-#include "libavutil/cpu.h" |
|
27 |
-#include "libavutil/pixdesc.h" |
|
28 |
-#include "libavutil/x86/asm.h" |
|
29 |
-#include "libavutil/x86/cpu.h" |
|
30 |
-#include "libavcodec/huffyuvencdsp.h" |
|
31 |
-#include "libavcodec/mathops.h" |
|
32 |
- |
|
33 |
-void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
34 |
- intptr_t w); |
|
35 |
-void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
36 |
- intptr_t w); |
|
37 |
-void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
38 |
- intptr_t w); |
|
39 |
-void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, |
|
40 |
- unsigned mask, int w); |
|
41 |
-void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, |
|
42 |
- unsigned mask, int w); |
|
43 |
-void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, |
|
44 |
- unsigned mask, int w, int *left, int *left_top); |
|
45 |
- |
|
46 |
-#if HAVE_INLINE_ASM |
|
47 |
- |
|
48 |
-static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, |
|
49 |
- const uint8_t *src2, intptr_t w, |
|
50 |
- int *left, int *left_top) |
|
51 |
-{ |
|
52 |
- x86_reg i = 0; |
|
53 |
- uint8_t l, lt; |
|
54 |
- |
|
55 |
- __asm__ volatile ( |
|
56 |
- "movq (%1, %0), %%mm0 \n\t" // LT |
|
57 |
- "psllq $8, %%mm0 \n\t" |
|
58 |
- "1: \n\t" |
|
59 |
- "movq (%1, %0), %%mm1 \n\t" // T |
|
60 |
- "movq -1(%2, %0), %%mm2 \n\t" // L |
|
61 |
- "movq (%2, %0), %%mm3 \n\t" // X |
|
62 |
- "movq %%mm2, %%mm4 \n\t" // L |
|
63 |
- "psubb %%mm0, %%mm2 \n\t" |
|
64 |
- "paddb %%mm1, %%mm2 \n\t" // L + T - LT |
|
65 |
- "movq %%mm4, %%mm5 \n\t" // L |
|
66 |
- "pmaxub %%mm1, %%mm4 \n\t" // max(T, L) |
|
67 |
- "pminub %%mm5, %%mm1 \n\t" // min(T, L) |
|
68 |
- "pminub %%mm2, %%mm4 \n\t" |
|
69 |
- "pmaxub %%mm1, %%mm4 \n\t" |
|
70 |
- "psubb %%mm4, %%mm3 \n\t" // dst - pred |
|
71 |
- "movq %%mm3, (%3, %0) \n\t" |
|
72 |
- "add $8, %0 \n\t" |
|
73 |
- "movq -1(%1, %0), %%mm0 \n\t" // LT |
|
74 |
- "cmp %4, %0 \n\t" |
|
75 |
- " jb 1b \n\t" |
|
76 |
- : "+r" (i) |
|
77 |
- : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w)); |
|
78 |
- |
|
79 |
- l = *left; |
|
80 |
- lt = *left_top; |
|
81 |
- |
|
82 |
- dst[0] = src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt) & 0xFF); |
|
83 |
- |
|
84 |
- *left_top = src1[w - 1]; |
|
85 |
- *left = src2[w - 1]; |
|
86 |
-} |
|
87 |
- |
|
88 |
-#endif /* HAVE_INLINE_ASM */ |
|
89 |
- |
|
90 |
-av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx) |
|
91 |
-{ |
|
92 |
- av_unused int cpu_flags = av_get_cpu_flags(); |
|
93 |
- const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); |
|
94 |
- |
|
95 |
- if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { |
|
96 |
- c->diff_bytes = ff_diff_bytes_mmx; |
|
97 |
- c->diff_int16 = ff_diff_int16_mmx; |
|
98 |
- } |
|
99 |
- |
|
100 |
-#if HAVE_INLINE_ASM |
|
101 |
- if (INLINE_MMXEXT(cpu_flags)) { |
|
102 |
- c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext; |
|
103 |
- } |
|
104 |
-#endif /* HAVE_INLINE_ASM */ |
|
105 |
- |
|
106 |
- if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { |
|
107 |
- c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; |
|
108 |
- } |
|
109 |
- |
|
110 |
- if (EXTERNAL_SSE2(cpu_flags)) { |
|
111 |
- c->diff_bytes = ff_diff_bytes_sse2; |
|
112 |
- c->diff_int16 = ff_diff_int16_sse2; |
|
113 |
- } |
|
114 |
- |
|
115 |
- if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
|
116 |
- c->diff_bytes = ff_diff_bytes_avx2; |
|
117 |
- } |
|
118 |
-} |
119 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,150 @@ |
0 |
+;************************************************************************ |
|
1 |
+;* SIMD-optimized lossless video encoding functions |
|
2 |
+;* Copyright (c) 2000, 2001 Fabrice Bellard |
|
3 |
+;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
4 |
+;* |
|
5 |
+;* MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
|
6 |
+;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com> |
|
7 |
+;* |
|
8 |
+;* This file is part of FFmpeg. |
|
9 |
+;* |
|
10 |
+;* FFmpeg is free software; you can redistribute it and/or |
|
11 |
+;* modify it under the terms of the GNU Lesser General Public |
|
12 |
+;* License as published by the Free Software Foundation; either |
|
13 |
+;* version 2.1 of the License, or (at your option) any later version. |
|
14 |
+;* |
|
15 |
+;* FFmpeg is distributed in the hope that it will be useful, |
|
16 |
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
17 |
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
18 |
+;* Lesser General Public License for more details. |
|
19 |
+;* |
|
20 |
+;* You should have received a copy of the GNU Lesser General Public |
|
21 |
+;* License along with FFmpeg; if not, write to the Free Software |
|
22 |
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
23 |
+;****************************************************************************** |
|
24 |
+ |
|
25 |
+%include "libavutil/x86/x86util.asm" |
|
26 |
+ |
|
27 |
+section .text |
|
28 |
+ |
|
29 |
+; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
30 |
+; intptr_t w); |
|
31 |
+%macro DIFF_BYTES_PROLOGUE 0 |
|
32 |
+%if ARCH_X86_32 |
|
33 |
+cglobal diff_bytes, 3,5,2, dst, src1, src2 |
|
34 |
+%define wq r4q |
|
35 |
+ DECLARE_REG_TMP 3 |
|
36 |
+ mov wq, r3mp |
|
37 |
+%else |
|
38 |
+cglobal diff_bytes, 4,5,2, dst, src1, src2, w |
|
39 |
+ DECLARE_REG_TMP 4 |
|
40 |
+%endif ; ARCH_X86_32 |
|
41 |
+%define i t0q |
|
42 |
+%endmacro |
|
43 |
+ |
|
44 |
+; label to jump to if w < regsize |
|
45 |
+%macro DIFF_BYTES_LOOP_PREP 1 |
|
46 |
+ mov i, wq |
|
47 |
+ and i, -2 * regsize |
|
48 |
+ jz %1 |
|
49 |
+ add dstq, i |
|
50 |
+ add src1q, i |
|
51 |
+ add src2q, i |
|
52 |
+ neg i |
|
53 |
+%endmacro |
|
54 |
+ |
|
55 |
+; mov type used for src1q, dstq, first reg, second reg |
|
56 |
+%macro DIFF_BYTES_LOOP_CORE 4 |
|
57 |
+%if mmsize != 16 |
|
58 |
+ mov%1 %3, [src1q + i] |
|
59 |
+ mov%1 %4, [src1q + i + regsize] |
|
60 |
+ psubb %3, [src2q + i] |
|
61 |
+ psubb %4, [src2q + i + regsize] |
|
62 |
+ mov%2 [dstq + i], %3 |
|
63 |
+ mov%2 [regsize + dstq + i], %4 |
|
64 |
+%else |
|
65 |
+ ; SSE enforces alignment of psubb operand |
|
66 |
+ mov%1 %3, [src1q + i] |
|
67 |
+ movu %4, [src2q + i] |
|
68 |
+ psubb %3, %4 |
|
69 |
+ mov%2 [dstq + i], %3 |
|
70 |
+ mov%1 %3, [src1q + i + regsize] |
|
71 |
+ movu %4, [src2q + i + regsize] |
|
72 |
+ psubb %3, %4 |
|
73 |
+ mov%2 [regsize + dstq + i], %3 |
|
74 |
+%endif |
|
75 |
+%endmacro |
|
76 |
+ |
|
77 |
+%macro DIFF_BYTES_BODY 2 ; mov type used for src1q, for dstq |
|
78 |
+ %define regsize mmsize |
|
79 |
+.loop_%1%2: |
|
80 |
+ DIFF_BYTES_LOOP_CORE %1, %2, m0, m1 |
|
81 |
+ add i, 2 * regsize |
|
82 |
+ jl .loop_%1%2 |
|
83 |
+.skip_main_%1%2: |
|
84 |
+ and wq, 2 * regsize - 1 |
|
85 |
+ jz .end_%1%2 |
|
86 |
+%if mmsize > 16 |
|
87 |
+ ; fall back to narrower xmm |
|
88 |
+ %define regsize mmsize / 2 |
|
89 |
+ DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa |
|
90 |
+.loop2_%1%2: |
|
91 |
+ DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1 |
|
92 |
+ add i, 2 * regsize |
|
93 |
+ jl .loop2_%1%2 |
|
94 |
+.setup_loop_gpr_%1%2: |
|
95 |
+ and wq, 2 * regsize - 1 |
|
96 |
+ jz .end_%1%2 |
|
97 |
+%endif |
|
98 |
+ add dstq, wq |
|
99 |
+ add src1q, wq |
|
100 |
+ add src2q, wq |
|
101 |
+ neg wq |
|
102 |
+.loop_gpr_%1%2: |
|
103 |
+ mov t0b, [src1q + wq] |
|
104 |
+ sub t0b, [src2q + wq] |
|
105 |
+ mov [dstq + wq], t0b |
|
106 |
+ inc wq |
|
107 |
+ jl .loop_gpr_%1%2 |
|
108 |
+.end_%1%2: |
|
109 |
+ REP_RET |
|
110 |
+%endmacro |
|
111 |
+ |
|
112 |
+%if ARCH_X86_32 |
|
113 |
+INIT_MMX mmx |
|
114 |
+DIFF_BYTES_PROLOGUE |
|
115 |
+ %define regsize mmsize |
|
116 |
+ DIFF_BYTES_LOOP_PREP .skip_main_aa |
|
117 |
+ DIFF_BYTES_BODY a, a |
|
118 |
+%undef i |
|
119 |
+%endif |
|
120 |
+ |
|
121 |
+INIT_XMM sse2 |
|
122 |
+DIFF_BYTES_PROLOGUE |
|
123 |
+ %define regsize mmsize |
|
124 |
+ DIFF_BYTES_LOOP_PREP .skip_main_aa |
|
125 |
+ test dstq, regsize - 1 |
|
126 |
+ jnz .loop_uu |
|
127 |
+ test src1q, regsize - 1 |
|
128 |
+ jnz .loop_ua |
|
129 |
+ DIFF_BYTES_BODY a, a |
|
130 |
+ DIFF_BYTES_BODY u, a |
|
131 |
+ DIFF_BYTES_BODY u, u |
|
132 |
+%undef i |
|
133 |
+ |
|
134 |
+%if HAVE_AVX2_EXTERNAL |
|
135 |
+INIT_YMM avx2 |
|
136 |
+DIFF_BYTES_PROLOGUE |
|
137 |
+ %define regsize mmsize |
|
138 |
+ ; Directly using unaligned SSE2 version is marginally faster than |
|
139 |
+ ; branching based on arguments. |
|
140 |
+ DIFF_BYTES_LOOP_PREP .skip_main_uu |
|
141 |
+ test dstq, regsize - 1 |
|
142 |
+ jnz .loop_uu |
|
143 |
+ test src1q, regsize - 1 |
|
144 |
+ jnz .loop_ua |
|
145 |
+ DIFF_BYTES_BODY a, a |
|
146 |
+ DIFF_BYTES_BODY u, a |
|
147 |
+ DIFF_BYTES_BODY u, u |
|
148 |
+%undef i |
|
149 |
+%endif |
0 | 150 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,104 @@ |
0 |
+/* |
|
1 |
+ * SIMD-optimized lossless video encoding functions |
|
2 |
+ * Copyright (c) 2000, 2001 Fabrice Bellard |
|
3 |
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
4 |
+ * |
|
5 |
+ * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
|
6 |
+ * |
|
7 |
+ * This file is part of FFmpeg. |
|
8 |
+ * |
|
9 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
10 |
+ * modify it under the terms of the GNU Lesser General Public |
|
11 |
+ * License as published by the Free Software Foundation; either |
|
12 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
13 |
+ * |
|
14 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
15 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
16 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
17 |
+ * Lesser General Public License for more details. |
|
18 |
+ * |
|
19 |
+ * You should have received a copy of the GNU Lesser General Public |
|
20 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
21 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
22 |
+ */ |
|
23 |
+ |
|
24 |
+#include "libavutil/attributes.h" |
|
25 |
+#include "libavutil/cpu.h" |
|
26 |
+#include "libavutil/x86/asm.h" |
|
27 |
+#include "libavutil/x86/cpu.h" |
|
28 |
+#include "libavcodec/lossless_videoencdsp.h" |
|
29 |
+#include "libavcodec/mathops.h" |
|
30 |
+ |
|
31 |
+void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
32 |
+ intptr_t w); |
|
33 |
+void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
34 |
+ intptr_t w); |
|
35 |
+void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
36 |
+ intptr_t w); |
|
37 |
+ |
|
38 |
+#if HAVE_INLINE_ASM |
|
39 |
+ |
|
40 |
+static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, |
|
41 |
+ const uint8_t *src2, intptr_t w, |
|
42 |
+ int *left, int *left_top) |
|
43 |
+{ |
|
44 |
+ x86_reg i = 0; |
|
45 |
+ uint8_t l, lt; |
|
46 |
+ |
|
47 |
+ __asm__ volatile ( |
|
48 |
+ "movq (%1, %0), %%mm0 \n\t" // LT |
|
49 |
+ "psllq $8, %%mm0 \n\t" |
|
50 |
+ "1: \n\t" |
|
51 |
+ "movq (%1, %0), %%mm1 \n\t" // T |
|
52 |
+ "movq -1(%2, %0), %%mm2 \n\t" // L |
|
53 |
+ "movq (%2, %0), %%mm3 \n\t" // X |
|
54 |
+ "movq %%mm2, %%mm4 \n\t" // L |
|
55 |
+ "psubb %%mm0, %%mm2 \n\t" |
|
56 |
+ "paddb %%mm1, %%mm2 \n\t" // L + T - LT |
|
57 |
+ "movq %%mm4, %%mm5 \n\t" // L |
|
58 |
+ "pmaxub %%mm1, %%mm4 \n\t" // max(T, L) |
|
59 |
+ "pminub %%mm5, %%mm1 \n\t" // min(T, L) |
|
60 |
+ "pminub %%mm2, %%mm4 \n\t" |
|
61 |
+ "pmaxub %%mm1, %%mm4 \n\t" |
|
62 |
+ "psubb %%mm4, %%mm3 \n\t" // dst - pred |
|
63 |
+ "movq %%mm3, (%3, %0) \n\t" |
|
64 |
+ "add $8, %0 \n\t" |
|
65 |
+ "movq -1(%1, %0), %%mm0 \n\t" // LT |
|
66 |
+ "cmp %4, %0 \n\t" |
|
67 |
+ " jb 1b \n\t" |
|
68 |
+ : "+r" (i) |
|
69 |
+ : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w)); |
|
70 |
+ |
|
71 |
+ l = *left; |
|
72 |
+ lt = *left_top; |
|
73 |
+ |
|
74 |
+ dst[0] = src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt) & 0xFF); |
|
75 |
+ |
|
76 |
+ *left_top = src1[w - 1]; |
|
77 |
+ *left = src2[w - 1]; |
|
78 |
+} |
|
79 |
+ |
|
80 |
+#endif /* HAVE_INLINE_ASM */ |
|
81 |
+ |
|
82 |
+av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c) |
|
83 |
+{ |
|
84 |
+ av_unused int cpu_flags = av_get_cpu_flags(); |
|
85 |
+ |
|
86 |
+ if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { |
|
87 |
+ c->diff_bytes = ff_diff_bytes_mmx; |
|
88 |
+ } |
|
89 |
+ |
|
90 |
+#if HAVE_INLINE_ASM |
|
91 |
+ if (INLINE_MMXEXT(cpu_flags)) { |
|
92 |
+ c->sub_median_pred = sub_median_pred_mmxext; |
|
93 |
+ } |
|
94 |
+#endif /* HAVE_INLINE_ASM */ |
|
95 |
+ |
|
96 |
+ if (EXTERNAL_SSE2(cpu_flags)) { |
|
97 |
+ c->diff_bytes = ff_diff_bytes_sse2; |
|
98 |
+ } |
|
99 |
+ |
|
100 |
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
|
101 |
+ c->diff_bytes = ff_diff_bytes_avx2; |
|
102 |
+ } |
|
103 |
+} |