... | ... |
@@ -1551,6 +1551,7 @@ CONFIG_EXTRA=" |
1551 | 1551 |
mpegvideo |
1552 | 1552 |
mpegvideoenc |
1553 | 1553 |
nettle |
1554 |
+ qpeldsp |
|
1554 | 1555 |
rangecoder |
1555 | 1556 |
riffdec |
1556 | 1557 |
riffenc |
... | ... |
@@ -1704,7 +1705,7 @@ rdft_select="fft" |
1704 | 1704 |
mpegaudio_select="mpegaudiodsp" |
1705 | 1705 |
mpegaudiodsp_select="dct" |
1706 | 1706 |
mpegvideo_select="dsputil hpeldsp videodsp" |
1707 |
-mpegvideoenc_select="dsputil mpegvideo" |
|
1707 |
+mpegvideoenc_select="dsputil mpegvideo qpeldsp" |
|
1708 | 1708 |
|
1709 | 1709 |
# decoders / encoders |
1710 | 1710 |
aac_decoder_select="mdct sinewin" |
... | ... |
@@ -1730,7 +1731,7 @@ atrac3p_decoder_select="mdct sinewin" |
1730 | 1730 |
bink_decoder_select="dsputil hpeldsp" |
1731 | 1731 |
binkaudio_dct_decoder_select="mdct rdft dct sinewin" |
1732 | 1732 |
binkaudio_rdft_decoder_select="mdct rdft sinewin" |
1733 |
-cavs_decoder_select="dsputil golomb h264chroma videodsp" |
|
1733 |
+cavs_decoder_select="dsputil golomb h264chroma qpeldsp videodsp" |
|
1734 | 1734 |
cllc_decoder_select="dsputil" |
1735 | 1735 |
comfortnoise_encoder_select="lpc" |
1736 | 1736 |
cook_decoder_select="dsputil mdct sinewin" |
... | ... |
@@ -1766,7 +1767,7 @@ g2m_decoder_deps="zlib" |
1766 | 1766 |
g2m_decoder_select="dsputil" |
1767 | 1767 |
h261_decoder_select="error_resilience mpegvideo" |
1768 | 1768 |
h261_encoder_select="aandcttables mpegvideoenc" |
1769 |
-h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo" |
|
1769 |
+h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo qpeldsp" |
|
1770 | 1770 |
h263_encoder_select="aandcttables h263dsp mpegvideoenc" |
1771 | 1771 |
h263i_decoder_select="h263_decoder" |
1772 | 1772 |
h263p_encoder_select="h263_encoder" |
... | ... |
@@ -1818,7 +1819,7 @@ msmpeg4v2_decoder_select="h263_decoder" |
1818 | 1818 |
msmpeg4v2_encoder_select="h263_encoder" |
1819 | 1819 |
msmpeg4v3_decoder_select="h263_decoder" |
1820 | 1820 |
msmpeg4v3_encoder_select="h263_encoder" |
1821 |
-mss2_decoder_select="error_resilience vc1_decoder" |
|
1821 |
+mss2_decoder_select="error_resilience qpeldsp vc1_decoder" |
|
1822 | 1822 |
mxpeg_decoder_select="mjpeg_decoder" |
1823 | 1823 |
nellymoser_decoder_select="mdct sinewin" |
1824 | 1824 |
nellymoser_encoder_select="audio_frame_queue mdct sinewin" |
... | ... |
@@ -1860,7 +1861,7 @@ twinvq_decoder_select="mdct lsp sinewin" |
1860 | 1860 |
utvideo_decoder_select="dsputil" |
1861 | 1861 |
utvideo_encoder_select="dsputil huffman huffyuvencdsp" |
1862 | 1862 |
vble_decoder_select="huffyuvdsp" |
1863 |
-vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8" |
|
1863 |
+vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp" |
|
1864 | 1864 |
vc1image_decoder_select="vc1_decoder" |
1865 | 1865 |
vorbis_decoder_select="mdct" |
1866 | 1866 |
vorbis_encoder_select="mdct" |
... | ... |
@@ -1936,8 +1937,8 @@ wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel" |
1936 | 1936 |
|
1937 | 1937 |
# parsers |
1938 | 1938 |
h264_parser_select="h264_decoder" |
1939 |
-mpeg4video_parser_select="error_resilience h263dsp mpegvideo" |
|
1940 | 1939 |
mpegvideo_parser_select="error_resilience mpegvideo" |
1940 |
+mpeg4video_parser_select="error_resilience h263dsp mpegvideo qpeldsp" |
|
1941 | 1941 |
vc1_parser_select="mpegvideo" |
1942 | 1942 |
|
1943 | 1943 |
# external libraries |
... | ... |
@@ -63,6 +63,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideo_motion.o \ |
63 | 63 |
mpegutils.o |
64 | 64 |
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ |
65 | 65 |
motion_est.o ratecontrol.o |
66 |
+OBJS-$(CONFIG_QPELDSP) += qpeldsp.o |
|
66 | 67 |
OBJS-$(CONFIG_RANGECODER) += rangecoder.o |
67 | 68 |
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o |
68 | 69 |
OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) |
... | ... |
@@ -48,9 +48,6 @@ uint32_t ff_square_tab[512] = { 0, }; |
48 | 48 |
#undef BIT_DEPTH |
49 | 49 |
|
50 | 50 |
#define BIT_DEPTH 8 |
51 |
-#include "hpel_template.c" |
|
52 |
-#include "tpel_template.c" |
|
53 |
-#include "dsputil_template.c" |
|
54 | 51 |
#include "dsputilenc_template.c" |
55 | 52 |
|
56 | 53 |
/* Input permutation for the simple_idct_mmx */ |
... | ... |
@@ -485,701 +482,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
485 | 485 |
} |
486 | 486 |
} |
487 | 487 |
|
488 |
-#define QPEL_MC(r, OPNAME, RND, OP) \ |
|
489 |
-static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \ |
|
490 |
- int dstStride, int srcStride, \ |
|
491 |
- int h) \ |
|
492 |
-{ \ |
|
493 |
- const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
|
494 |
- int i; \ |
|
495 |
- \ |
|
496 |
- for (i = 0; i < h; i++) { \ |
|
497 |
- OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \ |
|
498 |
- OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \ |
|
499 |
- OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \ |
|
500 |
- OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \ |
|
501 |
- OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \ |
|
502 |
- OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \ |
|
503 |
- OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \ |
|
504 |
- OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \ |
|
505 |
- dst += dstStride; \ |
|
506 |
- src += srcStride; \ |
|
507 |
- } \ |
|
508 |
-} \ |
|
509 |
- \ |
|
510 |
-static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, \ |
|
511 |
- int dstStride, int srcStride) \ |
|
512 |
-{ \ |
|
513 |
- const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
|
514 |
- const int w = 8; \ |
|
515 |
- int i; \ |
|
516 |
- \ |
|
517 |
- for (i = 0; i < w; i++) { \ |
|
518 |
- const int src0 = src[0 * srcStride]; \ |
|
519 |
- const int src1 = src[1 * srcStride]; \ |
|
520 |
- const int src2 = src[2 * srcStride]; \ |
|
521 |
- const int src3 = src[3 * srcStride]; \ |
|
522 |
- const int src4 = src[4 * srcStride]; \ |
|
523 |
- const int src5 = src[5 * srcStride]; \ |
|
524 |
- const int src6 = src[6 * srcStride]; \ |
|
525 |
- const int src7 = src[7 * srcStride]; \ |
|
526 |
- const int src8 = src[8 * srcStride]; \ |
|
527 |
- OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \ |
|
528 |
- OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \ |
|
529 |
- OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \ |
|
530 |
- OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \ |
|
531 |
- OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \ |
|
532 |
- OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \ |
|
533 |
- OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \ |
|
534 |
- OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \ |
|
535 |
- dst++; \ |
|
536 |
- src++; \ |
|
537 |
- } \ |
|
538 |
-} \ |
|
539 |
- \ |
|
540 |
-static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, \ |
|
541 |
- int dstStride, int srcStride, \ |
|
542 |
- int h) \ |
|
543 |
-{ \ |
|
544 |
- const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
|
545 |
- int i; \ |
|
546 |
- \ |
|
547 |
- for (i = 0; i < h; i++) { \ |
|
548 |
- OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \ |
|
549 |
- OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \ |
|
550 |
- OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \ |
|
551 |
- OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \ |
|
552 |
- OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \ |
|
553 |
- OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[9])); \ |
|
554 |
- OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[9]) * 3 - (src[3] + src[10])); \ |
|
555 |
- OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[9]) * 6 + (src[5] + src[10]) * 3 - (src[4] + src[11])); \ |
|
556 |
- OP(dst[8], (src[8] + src[9]) * 20 - (src[7] + src[10]) * 6 + (src[6] + src[11]) * 3 - (src[5] + src[12])); \ |
|
557 |
- OP(dst[9], (src[9] + src[10]) * 20 - (src[8] + src[11]) * 6 + (src[7] + src[12]) * 3 - (src[6] + src[13])); \ |
|
558 |
- OP(dst[10], (src[10] + src[11]) * 20 - (src[9] + src[12]) * 6 + (src[8] + src[13]) * 3 - (src[7] + src[14])); \ |
|
559 |
- OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9] + src[14]) * 3 - (src[8] + src[15])); \ |
|
560 |
- OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9] + src[16])); \ |
|
561 |
- OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \ |
|
562 |
- OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \ |
|
563 |
- OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \ |
|
564 |
- dst += dstStride; \ |
|
565 |
- src += srcStride; \ |
|
566 |
- } \ |
|
567 |
-} \ |
|
568 |
- \ |
|
569 |
-static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, \ |
|
570 |
- int dstStride, int srcStride) \ |
|
571 |
-{ \ |
|
572 |
- const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
|
573 |
- const int w = 16; \ |
|
574 |
- int i; \ |
|
575 |
- \ |
|
576 |
- for (i = 0; i < w; i++) { \ |
|
577 |
- const int src0 = src[0 * srcStride]; \ |
|
578 |
- const int src1 = src[1 * srcStride]; \ |
|
579 |
- const int src2 = src[2 * srcStride]; \ |
|
580 |
- const int src3 = src[3 * srcStride]; \ |
|
581 |
- const int src4 = src[4 * srcStride]; \ |
|
582 |
- const int src5 = src[5 * srcStride]; \ |
|
583 |
- const int src6 = src[6 * srcStride]; \ |
|
584 |
- const int src7 = src[7 * srcStride]; \ |
|
585 |
- const int src8 = src[8 * srcStride]; \ |
|
586 |
- const int src9 = src[9 * srcStride]; \ |
|
587 |
- const int src10 = src[10 * srcStride]; \ |
|
588 |
- const int src11 = src[11 * srcStride]; \ |
|
589 |
- const int src12 = src[12 * srcStride]; \ |
|
590 |
- const int src13 = src[13 * srcStride]; \ |
|
591 |
- const int src14 = src[14 * srcStride]; \ |
|
592 |
- const int src15 = src[15 * srcStride]; \ |
|
593 |
- const int src16 = src[16 * srcStride]; \ |
|
594 |
- OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \ |
|
595 |
- OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \ |
|
596 |
- OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \ |
|
597 |
- OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \ |
|
598 |
- OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \ |
|
599 |
- OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src9)); \ |
|
600 |
- OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src9) * 3 - (src3 + src10)); \ |
|
601 |
- OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src9) * 6 + (src5 + src10) * 3 - (src4 + src11)); \ |
|
602 |
- OP(dst[8 * dstStride], (src8 + src9) * 20 - (src7 + src10) * 6 + (src6 + src11) * 3 - (src5 + src12)); \ |
|
603 |
- OP(dst[9 * dstStride], (src9 + src10) * 20 - (src8 + src11) * 6 + (src7 + src12) * 3 - (src6 + src13)); \ |
|
604 |
- OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9 + src12) * 6 + (src8 + src13) * 3 - (src7 + src14)); \ |
|
605 |
- OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9 + src14) * 3 - (src8 + src15)); \ |
|
606 |
- OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9 + src16)); \ |
|
607 |
- OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \ |
|
608 |
- OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \ |
|
609 |
- OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \ |
|
610 |
- dst++; \ |
|
611 |
- src++; \ |
|
612 |
- } \ |
|
613 |
-} \ |
|
614 |
- \ |
|
615 |
-static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, \ |
|
616 |
- ptrdiff_t stride) \ |
|
617 |
-{ \ |
|
618 |
- uint8_t half[64]; \ |
|
619 |
- \ |
|
620 |
- put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \ |
|
621 |
- OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8); \ |
|
622 |
-} \ |
|
623 |
- \ |
|
624 |
-static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, \ |
|
625 |
- ptrdiff_t stride) \ |
|
626 |
-{ \ |
|
627 |
- OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8); \ |
|
628 |
-} \ |
|
629 |
- \ |
|
630 |
-static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, \ |
|
631 |
- ptrdiff_t stride) \ |
|
632 |
-{ \ |
|
633 |
- uint8_t half[64]; \ |
|
634 |
- \ |
|
635 |
- put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \ |
|
636 |
- OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8); \ |
|
637 |
-} \ |
|
638 |
- \ |
|
639 |
-static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, \ |
|
640 |
- ptrdiff_t stride) \ |
|
641 |
-{ \ |
|
642 |
- uint8_t full[16 * 9]; \ |
|
643 |
- uint8_t half[64]; \ |
|
644 |
- \ |
|
645 |
- copy_block9(full, src, 16, stride, 9); \ |
|
646 |
- put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \ |
|
647 |
- OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8); \ |
|
648 |
-} \ |
|
649 |
- \ |
|
650 |
-static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, \ |
|
651 |
- ptrdiff_t stride) \ |
|
652 |
-{ \ |
|
653 |
- uint8_t full[16 * 9]; \ |
|
654 |
- \ |
|
655 |
- copy_block9(full, src, 16, stride, 9); \ |
|
656 |
- OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16); \ |
|
657 |
-} \ |
|
658 |
- \ |
|
659 |
-static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, \ |
|
660 |
- ptrdiff_t stride) \ |
|
661 |
-{ \ |
|
662 |
- uint8_t full[16 * 9]; \ |
|
663 |
- uint8_t half[64]; \ |
|
664 |
- \ |
|
665 |
- copy_block9(full, src, 16, stride, 9); \ |
|
666 |
- put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \ |
|
667 |
- OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8); \ |
|
668 |
-} \ |
|
669 |
- \ |
|
670 |
-void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, \ |
|
671 |
- ptrdiff_t stride) \ |
|
672 |
-{ \ |
|
673 |
- uint8_t full[16 * 9]; \ |
|
674 |
- uint8_t halfH[72]; \ |
|
675 |
- uint8_t halfV[64]; \ |
|
676 |
- uint8_t halfHV[64]; \ |
|
677 |
- \ |
|
678 |
- copy_block9(full, src, 16, stride, 9); \ |
|
679 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
680 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \ |
|
681 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
682 |
- OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, \ |
|
683 |
- stride, 16, 8, 8, 8, 8); \ |
|
684 |
-} \ |
|
685 |
- \ |
|
686 |
-static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, \ |
|
687 |
- ptrdiff_t stride) \ |
|
688 |
-{ \ |
|
689 |
- uint8_t full[16 * 9]; \ |
|
690 |
- uint8_t halfH[72]; \ |
|
691 |
- uint8_t halfHV[64]; \ |
|
692 |
- \ |
|
693 |
- copy_block9(full, src, 16, stride, 9); \ |
|
694 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
695 |
- put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \ |
|
696 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
697 |
- OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \ |
|
698 |
-} \ |
|
699 |
- \ |
|
700 |
-void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, \ |
|
701 |
- ptrdiff_t stride) \ |
|
702 |
-{ \ |
|
703 |
- uint8_t full[16 * 9]; \ |
|
704 |
- uint8_t halfH[72]; \ |
|
705 |
- uint8_t halfV[64]; \ |
|
706 |
- uint8_t halfHV[64]; \ |
|
707 |
- \ |
|
708 |
- copy_block9(full, src, 16, stride, 9); \ |
|
709 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
710 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \ |
|
711 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
712 |
- OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV, \ |
|
713 |
- stride, 16, 8, 8, 8, 8); \ |
|
714 |
-} \ |
|
715 |
- \ |
|
716 |
-static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, \ |
|
717 |
- ptrdiff_t stride) \ |
|
718 |
-{ \ |
|
719 |
- uint8_t full[16 * 9]; \ |
|
720 |
- uint8_t halfH[72]; \ |
|
721 |
- uint8_t halfHV[64]; \ |
|
722 |
- \ |
|
723 |
- copy_block9(full, src, 16, stride, 9); \ |
|
724 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
725 |
- put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \ |
|
726 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
727 |
- OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \ |
|
728 |
-} \ |
|
729 |
- \ |
|
730 |
-void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, \ |
|
731 |
- ptrdiff_t stride) \ |
|
732 |
-{ \ |
|
733 |
- uint8_t full[16 * 9]; \ |
|
734 |
- uint8_t halfH[72]; \ |
|
735 |
- uint8_t halfV[64]; \ |
|
736 |
- uint8_t halfHV[64]; \ |
|
737 |
- \ |
|
738 |
- copy_block9(full, src, 16, stride, 9); \ |
|
739 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
740 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \ |
|
741 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
742 |
- OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV, \ |
|
743 |
- stride, 16, 8, 8, 8, 8); \ |
|
744 |
-} \ |
|
745 |
- \ |
|
746 |
-static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, \ |
|
747 |
- ptrdiff_t stride) \ |
|
748 |
-{ \ |
|
749 |
- uint8_t full[16 * 9]; \ |
|
750 |
- uint8_t halfH[72]; \ |
|
751 |
- uint8_t halfHV[64]; \ |
|
752 |
- \ |
|
753 |
- copy_block9(full, src, 16, stride, 9); \ |
|
754 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
755 |
- put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \ |
|
756 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
757 |
- OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \ |
|
758 |
-} \ |
|
759 |
- \ |
|
760 |
-void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, \ |
|
761 |
- ptrdiff_t stride) \ |
|
762 |
-{ \ |
|
763 |
- uint8_t full[16 * 9]; \ |
|
764 |
- uint8_t halfH[72]; \ |
|
765 |
- uint8_t halfV[64]; \ |
|
766 |
- uint8_t halfHV[64]; \ |
|
767 |
- \ |
|
768 |
- copy_block9(full, src, 16, stride, 9); \ |
|
769 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
770 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \ |
|
771 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
772 |
- OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV, \ |
|
773 |
- stride, 16, 8, 8, 8, 8); \ |
|
774 |
-} \ |
|
775 |
- \ |
|
776 |
-static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, \ |
|
777 |
- ptrdiff_t stride) \ |
|
778 |
-{ \ |
|
779 |
- uint8_t full[16 * 9]; \ |
|
780 |
- uint8_t halfH[72]; \ |
|
781 |
- uint8_t halfHV[64]; \ |
|
782 |
- \ |
|
783 |
- copy_block9(full, src, 16, stride, 9); \ |
|
784 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
785 |
- put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \ |
|
786 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
787 |
- OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \ |
|
788 |
-} \ |
|
789 |
- \ |
|
790 |
-static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, \ |
|
791 |
- ptrdiff_t stride) \ |
|
792 |
-{ \ |
|
793 |
- uint8_t halfH[72]; \ |
|
794 |
- uint8_t halfHV[64]; \ |
|
795 |
- \ |
|
796 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \ |
|
797 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
798 |
- OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \ |
|
799 |
-} \ |
|
800 |
- \ |
|
801 |
-static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, \ |
|
802 |
- ptrdiff_t stride) \ |
|
803 |
-{ \ |
|
804 |
- uint8_t halfH[72]; \ |
|
805 |
- uint8_t halfHV[64]; \ |
|
806 |
- \ |
|
807 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \ |
|
808 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
809 |
- OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \ |
|
810 |
-} \ |
|
811 |
- \ |
|
812 |
-void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, \ |
|
813 |
- ptrdiff_t stride) \ |
|
814 |
-{ \ |
|
815 |
- uint8_t full[16 * 9]; \ |
|
816 |
- uint8_t halfH[72]; \ |
|
817 |
- uint8_t halfV[64]; \ |
|
818 |
- uint8_t halfHV[64]; \ |
|
819 |
- \ |
|
820 |
- copy_block9(full, src, 16, stride, 9); \ |
|
821 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
822 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \ |
|
823 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
824 |
- OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \ |
|
825 |
-} \ |
|
826 |
- \ |
|
827 |
-static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, \ |
|
828 |
- ptrdiff_t stride) \ |
|
829 |
-{ \ |
|
830 |
- uint8_t full[16 * 9]; \ |
|
831 |
- uint8_t halfH[72]; \ |
|
832 |
- \ |
|
833 |
- copy_block9(full, src, 16, stride, 9); \ |
|
834 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
835 |
- put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \ |
|
836 |
- OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \ |
|
837 |
-} \ |
|
838 |
- \ |
|
839 |
-void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, \ |
|
840 |
- ptrdiff_t stride) \ |
|
841 |
-{ \ |
|
842 |
- uint8_t full[16 * 9]; \ |
|
843 |
- uint8_t halfH[72]; \ |
|
844 |
- uint8_t halfV[64]; \ |
|
845 |
- uint8_t halfHV[64]; \ |
|
846 |
- \ |
|
847 |
- copy_block9(full, src, 16, stride, 9); \ |
|
848 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
849 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \ |
|
850 |
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
851 |
- OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \ |
|
852 |
-} \ |
|
853 |
- \ |
|
854 |
-static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, \ |
|
855 |
- ptrdiff_t stride) \ |
|
856 |
-{ \ |
|
857 |
- uint8_t full[16 * 9]; \ |
|
858 |
- uint8_t halfH[72]; \ |
|
859 |
- \ |
|
860 |
- copy_block9(full, src, 16, stride, 9); \ |
|
861 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
862 |
- put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \ |
|
863 |
- OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \ |
|
864 |
-} \ |
|
865 |
- \ |
|
866 |
-static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, \ |
|
867 |
- ptrdiff_t stride) \ |
|
868 |
-{ \ |
|
869 |
- uint8_t halfH[72]; \ |
|
870 |
- \ |
|
871 |
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \ |
|
872 |
- OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \ |
|
873 |
-} \ |
|
874 |
- \ |
|
875 |
-static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, \ |
|
876 |
- ptrdiff_t stride) \ |
|
877 |
-{ \ |
|
878 |
- uint8_t half[256]; \ |
|
879 |
- \ |
|
880 |
- put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \ |
|
881 |
- OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16); \ |
|
882 |
-} \ |
|
883 |
- \ |
|
884 |
-static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, \ |
|
885 |
- ptrdiff_t stride) \ |
|
886 |
-{ \ |
|
887 |
- OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16); \ |
|
888 |
-} \ |
|
889 |
- \ |
|
890 |
-static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, \ |
|
891 |
- ptrdiff_t stride) \ |
|
892 |
-{ \ |
|
893 |
- uint8_t half[256]; \ |
|
894 |
- \ |
|
895 |
- put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \ |
|
896 |
- OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16); \ |
|
897 |
-} \ |
|
898 |
- \ |
|
899 |
-static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, \ |
|
900 |
- ptrdiff_t stride) \ |
|
901 |
-{ \ |
|
902 |
- uint8_t full[24 * 17]; \ |
|
903 |
- uint8_t half[256]; \ |
|
904 |
- \ |
|
905 |
- copy_block17(full, src, 24, stride, 17); \ |
|
906 |
- put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \ |
|
907 |
- OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16); \ |
|
908 |
-} \ |
|
909 |
- \ |
|
910 |
-static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, \ |
|
911 |
- ptrdiff_t stride) \ |
|
912 |
-{ \ |
|
913 |
- uint8_t full[24 * 17]; \ |
|
914 |
- \ |
|
915 |
- copy_block17(full, src, 24, stride, 17); \ |
|
916 |
- OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24); \ |
|
917 |
-} \ |
|
918 |
- \ |
|
919 |
-static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, \ |
|
920 |
- ptrdiff_t stride) \ |
|
921 |
-{ \ |
|
922 |
- uint8_t full[24 * 17]; \ |
|
923 |
- uint8_t half[256]; \ |
|
924 |
- \ |
|
925 |
- copy_block17(full, src, 24, stride, 17); \ |
|
926 |
- put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \ |
|
927 |
- OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16); \ |
|
928 |
-} \ |
|
929 |
- \ |
|
930 |
-void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, \ |
|
931 |
- ptrdiff_t stride) \ |
|
932 |
-{ \ |
|
933 |
- uint8_t full[24 * 17]; \ |
|
934 |
- uint8_t halfH[272]; \ |
|
935 |
- uint8_t halfV[256]; \ |
|
936 |
- uint8_t halfHV[256]; \ |
|
937 |
- \ |
|
938 |
- copy_block17(full, src, 24, stride, 17); \ |
|
939 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
940 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \ |
|
941 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
942 |
- OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, \ |
|
943 |
- stride, 24, 16, 16, 16, 16); \ |
|
944 |
-} \ |
|
945 |
- \ |
|
946 |
-static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, \ |
|
947 |
- ptrdiff_t stride) \ |
|
948 |
-{ \ |
|
949 |
- uint8_t full[24 * 17]; \ |
|
950 |
- uint8_t halfH[272]; \ |
|
951 |
- uint8_t halfHV[256]; \ |
|
952 |
- \ |
|
953 |
- copy_block17(full, src, 24, stride, 17); \ |
|
954 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
955 |
- put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \ |
|
956 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
957 |
- OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \ |
|
958 |
-} \ |
|
959 |
- \ |
|
960 |
-void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, \ |
|
961 |
- ptrdiff_t stride) \ |
|
962 |
-{ \ |
|
963 |
- uint8_t full[24 * 17]; \ |
|
964 |
- uint8_t halfH[272]; \ |
|
965 |
- uint8_t halfV[256]; \ |
|
966 |
- uint8_t halfHV[256]; \ |
|
967 |
- \ |
|
968 |
- copy_block17(full, src, 24, stride, 17); \ |
|
969 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
970 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \ |
|
971 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
972 |
- OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV, \ |
|
973 |
- stride, 24, 16, 16, 16, 16); \ |
|
974 |
-} \ |
|
975 |
- \ |
|
976 |
-static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, \ |
|
977 |
- ptrdiff_t stride) \ |
|
978 |
-{ \ |
|
979 |
- uint8_t full[24 * 17]; \ |
|
980 |
- uint8_t halfH[272]; \ |
|
981 |
- uint8_t halfHV[256]; \ |
|
982 |
- \ |
|
983 |
- copy_block17(full, src, 24, stride, 17); \ |
|
984 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
985 |
- put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \ |
|
986 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
987 |
- OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \ |
|
988 |
-} \ |
|
989 |
- \ |
|
990 |
-void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, \ |
|
991 |
- ptrdiff_t stride) \ |
|
992 |
-{ \ |
|
993 |
- uint8_t full[24 * 17]; \ |
|
994 |
- uint8_t halfH[272]; \ |
|
995 |
- uint8_t halfV[256]; \ |
|
996 |
- uint8_t halfHV[256]; \ |
|
997 |
- \ |
|
998 |
- copy_block17(full, src, 24, stride, 17); \ |
|
999 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
1000 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \ |
|
1001 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
1002 |
- OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV, \ |
|
1003 |
- stride, 24, 16, 16, 16, 16); \ |
|
1004 |
-} \ |
|
1005 |
- \ |
|
1006 |
-static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, \ |
|
1007 |
- ptrdiff_t stride) \ |
|
1008 |
-{ \ |
|
1009 |
- uint8_t full[24 * 17]; \ |
|
1010 |
- uint8_t halfH[272]; \ |
|
1011 |
- uint8_t halfHV[256]; \ |
|
1012 |
- \ |
|
1013 |
- copy_block17(full, src, 24, stride, 17); \ |
|
1014 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
1015 |
- put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \ |
|
1016 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
1017 |
- OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \ |
|
1018 |
-} \ |
|
1019 |
- \ |
|
1020 |
-void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, \ |
|
1021 |
- ptrdiff_t stride) \ |
|
1022 |
-{ \ |
|
1023 |
- uint8_t full[24 * 17]; \ |
|
1024 |
- uint8_t halfH[272]; \ |
|
1025 |
- uint8_t halfV[256]; \ |
|
1026 |
- uint8_t halfHV[256]; \ |
|
1027 |
- \ |
|
1028 |
- copy_block17(full, src, 24, stride, 17); \ |
|
1029 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
1030 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \ |
|
1031 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
1032 |
- OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV, \ |
|
1033 |
- stride, 24, 16, 16, 16, 16); \ |
|
1034 |
-} \ |
|
1035 |
- \ |
|
1036 |
-static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, \ |
|
1037 |
- ptrdiff_t stride) \ |
|
1038 |
-{ \ |
|
1039 |
- uint8_t full[24 * 17]; \ |
|
1040 |
- uint8_t halfH[272]; \ |
|
1041 |
- uint8_t halfHV[256]; \ |
|
1042 |
- \ |
|
1043 |
- copy_block17(full, src, 24, stride, 17); \ |
|
1044 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
1045 |
- put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \ |
|
1046 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
1047 |
- OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \ |
|
1048 |
-} \ |
|
1049 |
- \ |
|
1050 |
-static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, \ |
|
1051 |
- ptrdiff_t stride) \ |
|
1052 |
-{ \ |
|
1053 |
- uint8_t halfH[272]; \ |
|
1054 |
- uint8_t halfHV[256]; \ |
|
1055 |
- \ |
|
1056 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \ |
|
1057 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
1058 |
- OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \ |
|
1059 |
-} \ |
|
1060 |
- \ |
|
1061 |
-static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, \ |
|
1062 |
- ptrdiff_t stride) \ |
|
1063 |
-{ \ |
|
1064 |
- uint8_t halfH[272]; \ |
|
1065 |
- uint8_t halfHV[256]; \ |
|
1066 |
- \ |
|
1067 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \ |
|
1068 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
1069 |
- OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \ |
|
1070 |
-} \ |
|
1071 |
- \ |
|
1072 |
-void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, \ |
|
1073 |
- ptrdiff_t stride) \ |
|
1074 |
-{ \ |
|
1075 |
- uint8_t full[24 * 17]; \ |
|
1076 |
- uint8_t halfH[272]; \ |
|
1077 |
- uint8_t halfV[256]; \ |
|
1078 |
- uint8_t halfHV[256]; \ |
|
1079 |
- \ |
|
1080 |
- copy_block17(full, src, 24, stride, 17); \ |
|
1081 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
1082 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \ |
|
1083 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
1084 |
- OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \ |
|
1085 |
-} \ |
|
1086 |
- \ |
|
1087 |
-static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, \ |
|
1088 |
- ptrdiff_t stride) \ |
|
1089 |
-{ \ |
|
1090 |
- uint8_t full[24 * 17]; \ |
|
1091 |
- uint8_t halfH[272]; \ |
|
1092 |
- \ |
|
1093 |
- copy_block17(full, src, 24, stride, 17); \ |
|
1094 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
1095 |
- put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \ |
|
1096 |
- OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \ |
|
1097 |
-} \ |
|
1098 |
- \ |
|
1099 |
-void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, \ |
|
1100 |
- ptrdiff_t stride) \ |
|
1101 |
-{ \ |
|
1102 |
- uint8_t full[24 * 17]; \ |
|
1103 |
- uint8_t halfH[272]; \ |
|
1104 |
- uint8_t halfV[256]; \ |
|
1105 |
- uint8_t halfHV[256]; \ |
|
1106 |
- \ |
|
1107 |
- copy_block17(full, src, 24, stride, 17); \ |
|
1108 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
1109 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \ |
|
1110 |
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
1111 |
- OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \ |
|
1112 |
-} \ |
|
1113 |
- \ |
|
1114 |
-static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, \ |
|
1115 |
- ptrdiff_t stride) \ |
|
1116 |
-{ \ |
|
1117 |
- uint8_t full[24 * 17]; \ |
|
1118 |
- uint8_t halfH[272]; \ |
|
1119 |
- \ |
|
1120 |
- copy_block17(full, src, 24, stride, 17); \ |
|
1121 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
1122 |
- put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \ |
|
1123 |
- OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \ |
|
1124 |
-} \ |
|
1125 |
- \ |
|
1126 |
-static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, \ |
|
1127 |
- ptrdiff_t stride) \ |
|
1128 |
-{ \ |
|
1129 |
- uint8_t halfH[272]; \ |
|
1130 |
- \ |
|
1131 |
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \ |
|
1132 |
- OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \ |
|
1133 |
-} |
|
1134 |
- |
|
1135 |
-#define op_avg(a, b) a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1) |
|
1136 |
-#define op_avg_no_rnd(a, b) a = (((a) + cm[((b) + 15) >> 5]) >> 1) |
|
1137 |
-#define op_put(a, b) a = cm[((b) + 16) >> 5] |
|
1138 |
-#define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5] |
|
1139 |
- |
|
1140 |
-QPEL_MC(0, put_, _, op_put) |
|
1141 |
-QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) |
|
1142 |
-QPEL_MC(0, avg_, _, op_avg) |
|
1143 |
- |
|
1144 |
-#undef op_avg |
|
1145 |
-#undef op_put |
|
1146 |
-#undef op_put_no_rnd |
|
1147 |
- |
|
1148 |
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) |
|
1149 |
-{ |
|
1150 |
- put_pixels8_8_c(dst, src, stride, 8); |
|
1151 |
-} |
|
1152 |
- |
|
1153 |
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) |
|
1154 |
-{ |
|
1155 |
- avg_pixels8_8_c(dst, src, stride, 8); |
|
1156 |
-} |
|
1157 |
- |
|
1158 |
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) |
|
1159 |
-{ |
|
1160 |
- put_pixels16_8_c(dst, src, stride, 16); |
|
1161 |
-} |
|
1162 |
- |
|
1163 |
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) |
|
1164 |
-{ |
|
1165 |
- avg_pixels16_8_c(dst, src, stride, 16); |
|
1166 |
-} |
|
1167 |
- |
|
1168 |
-#define put_qpel8_mc00_c ff_put_pixels8x8_c |
|
1169 |
-#define avg_qpel8_mc00_c ff_avg_pixels8x8_c |
|
1170 |
-#define put_qpel16_mc00_c ff_put_pixels16x16_c |
|
1171 |
-#define avg_qpel16_mc00_c ff_avg_pixels16x16_c |
|
1172 |
-#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c |
|
1173 |
-#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c |
|
1174 |
- |
|
1175 |
-void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
1176 |
- int dst_stride, int src_stride1, int src_stride2, |
|
1177 |
- int h) |
|
1178 |
-{ |
|
1179 |
- put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); |
|
1180 |
- |
|
1181 |
-} |
|
1182 |
- |
|
1183 | 488 |
static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
1184 | 489 |
int line_size, int h) |
1185 | 490 |
{ |
... | ... |
@@ -2198,35 +1500,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) |
2198 | 2198 |
c->pix_abs[1][2] = pix_abs8_y2_c; |
2199 | 2199 |
c->pix_abs[1][3] = pix_abs8_xy2_c; |
2200 | 2200 |
|
2201 |
-#define dspfunc(PFX, IDX, NUM) \ |
|
2202 |
- c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \ |
|
2203 |
- c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \ |
|
2204 |
- c->PFX ## _pixels_tab[IDX][2] = PFX ## NUM ## _mc20_c; \ |
|
2205 |
- c->PFX ## _pixels_tab[IDX][3] = PFX ## NUM ## _mc30_c; \ |
|
2206 |
- c->PFX ## _pixels_tab[IDX][4] = PFX ## NUM ## _mc01_c; \ |
|
2207 |
- c->PFX ## _pixels_tab[IDX][5] = PFX ## NUM ## _mc11_c; \ |
|
2208 |
- c->PFX ## _pixels_tab[IDX][6] = PFX ## NUM ## _mc21_c; \ |
|
2209 |
- c->PFX ## _pixels_tab[IDX][7] = PFX ## NUM ## _mc31_c; \ |
|
2210 |
- c->PFX ## _pixels_tab[IDX][8] = PFX ## NUM ## _mc02_c; \ |
|
2211 |
- c->PFX ## _pixels_tab[IDX][9] = PFX ## NUM ## _mc12_c; \ |
|
2212 |
- c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ |
|
2213 |
- c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ |
|
2214 |
- c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ |
|
2215 |
- c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ |
|
2216 |
- c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ |
|
2217 |
- c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c |
|
2218 |
- |
|
2219 |
- dspfunc(put_qpel, 0, 16); |
|
2220 |
- dspfunc(put_qpel, 1, 8); |
|
2221 |
- |
|
2222 |
- dspfunc(put_no_rnd_qpel, 0, 16); |
|
2223 |
- dspfunc(put_no_rnd_qpel, 1, 8); |
|
2224 |
- |
|
2225 |
- dspfunc(avg_qpel, 0, 16); |
|
2226 |
- dspfunc(avg_qpel, 1, 8); |
|
2227 |
- |
|
2228 |
-#undef dspfunc |
|
2229 |
- |
|
2230 | 2201 |
#define SET_CMP_FUNC(name) \ |
2231 | 2202 |
c->name[0] = name ## 16_c; \ |
2232 | 2203 |
c->name[1] = name ## 8x8_c; |
... | ... |
@@ -34,15 +34,6 @@ |
34 | 34 |
|
35 | 35 |
extern uint32_t ff_square_tab[512]; |
36 | 36 |
|
37 |
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
|
38 |
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
|
39 |
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
|
40 |
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
|
41 |
- |
|
42 |
-void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
43 |
- int dst_stride, int src_stride1, int src_stride2, |
|
44 |
- int h); |
|
45 |
- |
|
46 | 37 |
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
47 | 38 |
int dxx, int dxy, int dyx, int dyy, int shift, int r, |
48 | 39 |
int width, int height); |
... | ... |
@@ -64,33 +55,9 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
64 | 64 |
* Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. |
65 | 65 |
* h for op_pixels_func is limited to { width / 2, width }, |
66 | 66 |
* but never larger than 16 and never smaller than 4. */ |
67 |
-typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */, |
|
68 |
- uint8_t *src /* align 1 */, ptrdiff_t stride); |
|
69 |
- |
|
70 | 67 |
typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */, |
71 | 68 |
uint8_t value, int line_size, int h); |
72 | 69 |
|
73 |
-#define DEF_OLD_QPEL(name) \ |
|
74 |
- void ff_put_ ## name(uint8_t *dst /* align width (8 or 16) */, \ |
|
75 |
- uint8_t *src /* align 1 */, ptrdiff_t stride); \ |
|
76 |
- void ff_put_no_rnd_ ## name(uint8_t *dst /* align width (8 or 16) */, \ |
|
77 |
- uint8_t *src /* align 1 */, ptrdiff_t stride); \ |
|
78 |
- void ff_avg_ ## name(uint8_t *dst /* align width (8 or 16) */, \ |
|
79 |
- uint8_t *src /* align 1 */, ptrdiff_t stride); |
|
80 |
- |
|
81 |
-DEF_OLD_QPEL(qpel16_mc11_old_c) |
|
82 |
-DEF_OLD_QPEL(qpel16_mc31_old_c) |
|
83 |
-DEF_OLD_QPEL(qpel16_mc12_old_c) |
|
84 |
-DEF_OLD_QPEL(qpel16_mc32_old_c) |
|
85 |
-DEF_OLD_QPEL(qpel16_mc13_old_c) |
|
86 |
-DEF_OLD_QPEL(qpel16_mc33_old_c) |
|
87 |
-DEF_OLD_QPEL(qpel8_mc11_old_c) |
|
88 |
-DEF_OLD_QPEL(qpel8_mc31_old_c) |
|
89 |
-DEF_OLD_QPEL(qpel8_mc12_old_c) |
|
90 |
-DEF_OLD_QPEL(qpel8_mc32_old_c) |
|
91 |
-DEF_OLD_QPEL(qpel8_mc13_old_c) |
|
92 |
-DEF_OLD_QPEL(qpel8_mc33_old_c) |
|
93 |
- |
|
94 | 70 |
struct MpegEncContext; |
95 | 71 |
/* Motion estimation: |
96 | 72 |
* h is limited to { width / 2, width, 2 * width }, |
... | ... |
@@ -174,10 +141,6 @@ typedef struct DSPContext { |
174 | 174 |
me_cmp_func ildct_cmp[6]; // only width 16 used |
175 | 175 |
me_cmp_func frame_skip_cmp[6]; // only width 8 used |
176 | 176 |
|
177 |
- qpel_mc_func put_qpel_pixels_tab[2][16]; |
|
178 |
- qpel_mc_func avg_qpel_pixels_tab[2][16]; |
|
179 |
- qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; |
|
180 |
- |
|
181 | 177 |
me_cmp_func pix_abs[2][4]; |
182 | 178 |
|
183 | 179 |
void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); |
184 | 180 |
deleted file mode 100644 |
... | ... |
@@ -1,223 +0,0 @@ |
1 |
-/* |
|
2 |
- * DSP utils |
|
3 |
- * Copyright (c) 2000, 2001 Fabrice Bellard |
|
4 |
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
5 |
- * |
|
6 |
- * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
|
7 |
- * |
|
8 |
- * This file is part of Libav. |
|
9 |
- * |
|
10 |
- * Libav is free software; you can redistribute it and/or |
|
11 |
- * modify it under the terms of the GNU Lesser General Public |
|
12 |
- * License as published by the Free Software Foundation; either |
|
13 |
- * version 2.1 of the License, or (at your option) any later version. |
|
14 |
- * |
|
15 |
- * Libav is distributed in the hope that it will be useful, |
|
16 |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
17 |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
18 |
- * Lesser General Public License for more details. |
|
19 |
- * |
|
20 |
- * You should have received a copy of the GNU Lesser General Public |
|
21 |
- * License along with Libav; if not, write to the Free Software |
|
22 |
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
23 |
- */ |
|
24 |
- |
|
25 |
-/** |
|
26 |
- * @file |
|
27 |
- * DSP utils |
|
28 |
- */ |
|
29 |
- |
|
30 |
-#define PIXOP2(OPNAME, OP) \ |
|
31 |
-static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \ |
|
32 |
- const uint8_t *src1, \ |
|
33 |
- const uint8_t *src2, \ |
|
34 |
- int dst_stride, \ |
|
35 |
- int src_stride1, \ |
|
36 |
- int src_stride2, \ |
|
37 |
- int h) \ |
|
38 |
-{ \ |
|
39 |
- int i; \ |
|
40 |
- \ |
|
41 |
- for (i = 0; i < h; i++) { \ |
|
42 |
- uint32_t a, b; \ |
|
43 |
- a = AV_RN32(&src1[i * src_stride1]); \ |
|
44 |
- b = AV_RN32(&src2[i * src_stride2]); \ |
|
45 |
- OP(*((uint32_t *) &dst[i * dst_stride]), \ |
|
46 |
- no_rnd_avg32(a, b)); \ |
|
47 |
- a = AV_RN32(&src1[i * src_stride1 + 4]); \ |
|
48 |
- b = AV_RN32(&src2[i * src_stride2 + 4]); \ |
|
49 |
- OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ |
|
50 |
- no_rnd_avg32(a, b)); \ |
|
51 |
- } \ |
|
52 |
-} \ |
|
53 |
- \ |
|
54 |
-static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst, \ |
|
55 |
- const uint8_t *src1, \ |
|
56 |
- const uint8_t *src2, \ |
|
57 |
- int dst_stride, \ |
|
58 |
- int src_stride1, \ |
|
59 |
- int src_stride2, \ |
|
60 |
- int h) \ |
|
61 |
-{ \ |
|
62 |
- OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride, \ |
|
63 |
- src_stride1, src_stride2, h); \ |
|
64 |
- OPNAME ## _no_rnd_pixels8_l2_8(dst + 8, \ |
|
65 |
- src1 + 8, \ |
|
66 |
- src2 + 8, \ |
|
67 |
- dst_stride, src_stride1, \ |
|
68 |
- src_stride2, h); \ |
|
69 |
-} \ |
|
70 |
- \ |
|
71 |
-static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst, \ |
|
72 |
- const uint8_t *src1, \ |
|
73 |
- const uint8_t *src2, \ |
|
74 |
- const uint8_t *src3, \ |
|
75 |
- const uint8_t *src4, \ |
|
76 |
- int dst_stride, \ |
|
77 |
- int src_stride1, \ |
|
78 |
- int src_stride2, \ |
|
79 |
- int src_stride3, \ |
|
80 |
- int src_stride4, \ |
|
81 |
- int h) \ |
|
82 |
-{ \ |
|
83 |
- /* FIXME HIGH BIT DEPTH */ \ |
|
84 |
- int i; \ |
|
85 |
- \ |
|
86 |
- for (i = 0; i < h; i++) { \ |
|
87 |
- uint32_t a, b, c, d, l0, l1, h0, h1; \ |
|
88 |
- a = AV_RN32(&src1[i * src_stride1]); \ |
|
89 |
- b = AV_RN32(&src2[i * src_stride2]); \ |
|
90 |
- c = AV_RN32(&src3[i * src_stride3]); \ |
|
91 |
- d = AV_RN32(&src4[i * src_stride4]); \ |
|
92 |
- l0 = (a & 0x03030303UL) + \ |
|
93 |
- (b & 0x03030303UL) + \ |
|
94 |
- 0x02020202UL; \ |
|
95 |
- h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
|
96 |
- ((b & 0xFCFCFCFCUL) >> 2); \ |
|
97 |
- l1 = (c & 0x03030303UL) + \ |
|
98 |
- (d & 0x03030303UL); \ |
|
99 |
- h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ |
|
100 |
- ((d & 0xFCFCFCFCUL) >> 2); \ |
|
101 |
- OP(*((uint32_t *) &dst[i * dst_stride]), \ |
|
102 |
- h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
|
103 |
- a = AV_RN32(&src1[i * src_stride1 + 4]); \ |
|
104 |
- b = AV_RN32(&src2[i * src_stride2 + 4]); \ |
|
105 |
- c = AV_RN32(&src3[i * src_stride3 + 4]); \ |
|
106 |
- d = AV_RN32(&src4[i * src_stride4 + 4]); \ |
|
107 |
- l0 = (a & 0x03030303UL) + \ |
|
108 |
- (b & 0x03030303UL) + \ |
|
109 |
- 0x02020202UL; \ |
|
110 |
- h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
|
111 |
- ((b & 0xFCFCFCFCUL) >> 2); \ |
|
112 |
- l1 = (c & 0x03030303UL) + \ |
|
113 |
- (d & 0x03030303UL); \ |
|
114 |
- h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ |
|
115 |
- ((d & 0xFCFCFCFCUL) >> 2); \ |
|
116 |
- OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ |
|
117 |
- h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
|
118 |
- } \ |
|
119 |
-} \ |
|
120 |
- \ |
|
121 |
-static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst, \ |
|
122 |
- const uint8_t *src1, \ |
|
123 |
- const uint8_t *src2, \ |
|
124 |
- const uint8_t *src3, \ |
|
125 |
- const uint8_t *src4, \ |
|
126 |
- int dst_stride, \ |
|
127 |
- int src_stride1, \ |
|
128 |
- int src_stride2, \ |
|
129 |
- int src_stride3, \ |
|
130 |
- int src_stride4, \ |
|
131 |
- int h) \ |
|
132 |
-{ \ |
|
133 |
- /* FIXME HIGH BIT DEPTH */ \ |
|
134 |
- int i; \ |
|
135 |
- \ |
|
136 |
- for (i = 0; i < h; i++) { \ |
|
137 |
- uint32_t a, b, c, d, l0, l1, h0, h1; \ |
|
138 |
- a = AV_RN32(&src1[i * src_stride1]); \ |
|
139 |
- b = AV_RN32(&src2[i * src_stride2]); \ |
|
140 |
- c = AV_RN32(&src3[i * src_stride3]); \ |
|
141 |
- d = AV_RN32(&src4[i * src_stride4]); \ |
|
142 |
- l0 = (a & 0x03030303UL) + \ |
|
143 |
- (b & 0x03030303UL) + \ |
|
144 |
- 0x01010101UL; \ |
|
145 |
- h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
|
146 |
- ((b & 0xFCFCFCFCUL) >> 2); \ |
|
147 |
- l1 = (c & 0x03030303UL) + \ |
|
148 |
- (d & 0x03030303UL); \ |
|
149 |
- h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ |
|
150 |
- ((d & 0xFCFCFCFCUL) >> 2); \ |
|
151 |
- OP(*((uint32_t *) &dst[i * dst_stride]), \ |
|
152 |
- h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
|
153 |
- a = AV_RN32(&src1[i * src_stride1 + 4]); \ |
|
154 |
- b = AV_RN32(&src2[i * src_stride2 + 4]); \ |
|
155 |
- c = AV_RN32(&src3[i * src_stride3 + 4]); \ |
|
156 |
- d = AV_RN32(&src4[i * src_stride4 + 4]); \ |
|
157 |
- l0 = (a & 0x03030303UL) + \ |
|
158 |
- (b & 0x03030303UL) + \ |
|
159 |
- 0x01010101UL; \ |
|
160 |
- h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
|
161 |
- ((b & 0xFCFCFCFCUL) >> 2); \ |
|
162 |
- l1 = (c & 0x03030303UL) + \ |
|
163 |
- (d & 0x03030303UL); \ |
|
164 |
- h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ |
|
165 |
- ((d & 0xFCFCFCFCUL) >> 2); \ |
|
166 |
- OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ |
|
167 |
- h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
|
168 |
- } \ |
|
169 |
-} \ |
|
170 |
- \ |
|
171 |
-static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst, \ |
|
172 |
- const uint8_t *src1, \ |
|
173 |
- const uint8_t *src2, \ |
|
174 |
- const uint8_t *src3, \ |
|
175 |
- const uint8_t *src4, \ |
|
176 |
- int dst_stride, \ |
|
177 |
- int src_stride1, \ |
|
178 |
- int src_stride2, \ |
|
179 |
- int src_stride3, \ |
|
180 |
- int src_stride4, \ |
|
181 |
- int h) \ |
|
182 |
-{ \ |
|
183 |
- OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride, \ |
|
184 |
- src_stride1, src_stride2, src_stride3, \ |
|
185 |
- src_stride4, h); \ |
|
186 |
- OPNAME ## _pixels8_l4_8(dst + 8, \ |
|
187 |
- src1 + 8, src2 + 8, \ |
|
188 |
- src3 + 8, src4 + 8, \ |
|
189 |
- dst_stride, src_stride1, src_stride2, \ |
|
190 |
- src_stride3, src_stride4, h); \ |
|
191 |
-} \ |
|
192 |
- \ |
|
193 |
-static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst, \ |
|
194 |
- const uint8_t *src1, \ |
|
195 |
- const uint8_t *src2, \ |
|
196 |
- const uint8_t *src3, \ |
|
197 |
- const uint8_t *src4, \ |
|
198 |
- int dst_stride, \ |
|
199 |
- int src_stride1, \ |
|
200 |
- int src_stride2, \ |
|
201 |
- int src_stride3, \ |
|
202 |
- int src_stride4, \ |
|
203 |
- int h) \ |
|
204 |
-{ \ |
|
205 |
- OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4, \ |
|
206 |
- dst_stride, src_stride1, \ |
|
207 |
- src_stride2, src_stride3, \ |
|
208 |
- src_stride4, h); \ |
|
209 |
- OPNAME ## _no_rnd_pixels8_l4_8(dst + 8, \ |
|
210 |
- src1 + 8, src2 + 8, \ |
|
211 |
- src3 + 8, src4 + 8, \ |
|
212 |
- dst_stride, src_stride1, \ |
|
213 |
- src_stride2, src_stride3, \ |
|
214 |
- src_stride4, h); \ |
|
215 |
-} \ |
|
216 |
- |
|
217 |
-#define op_avg(a, b) a = rnd_avg32(a, b) |
|
218 |
-#define op_put(a, b) a = b |
|
219 |
-#define put_no_rnd_pixels8_8_c put_pixels8_8_c |
|
220 |
-PIXOP2(avg, op_avg) |
|
221 |
-PIXOP2(put, op_put) |
|
222 |
-#undef op_avg |
|
223 |
-#undef op_put |
... | ... |
@@ -36,6 +36,7 @@ |
36 | 36 |
#include "mpeg4video_parser.h" |
37 | 37 |
#include "mpegvideo.h" |
38 | 38 |
#include "msmpeg4.h" |
39 |
+#include "qpeldsp.h" |
|
39 | 40 |
#include "thread.h" |
40 | 41 |
|
41 | 42 |
av_cold int ff_h263_decode_init(AVCodecContext *avctx) |
... | ... |
@@ -116,6 +117,7 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx) |
116 | 116 |
return ret; |
117 | 117 |
|
118 | 118 |
ff_h263dsp_init(&s->h263dsp); |
119 |
+ ff_qpeldsp_init(&s->qdsp); |
|
119 | 120 |
ff_h263_decode_init_vlc(); |
120 | 121 |
|
121 | 122 |
return 0; |
... | ... |
@@ -461,9 +463,9 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, |
461 | 461 |
avctx->has_b_frames = !s->low_delay; |
462 | 462 |
|
463 | 463 |
#define SET_QPEL_FUNC(postfix1, postfix2) \ |
464 |
- s->dsp.put_ ## postfix1 = ff_put_ ## postfix2; \ |
|
465 |
- s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2; \ |
|
466 |
- s->dsp.avg_ ## postfix1 = ff_avg_ ## postfix2; |
|
464 |
+ s->qdsp.put_ ## postfix1 = ff_put_ ## postfix2; \ |
|
465 |
+ s->qdsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2; \ |
|
466 |
+ s->qdsp.avg_ ## postfix1 = ff_avg_ ## postfix2; |
|
467 | 467 |
|
468 | 468 |
if (s->workaround_bugs & FF_BUG_STD_QPEL) { |
469 | 469 |
SET_QPEL_FUNC(qpel_pixels_tab[0][5], qpel16_mc11_old_c) |
... | ... |
@@ -527,11 +529,11 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, |
527 | 527 |
} |
528 | 528 |
|
529 | 529 |
if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) { |
530 |
- s->me.qpel_put = s->dsp.put_qpel_pixels_tab; |
|
531 |
- s->me.qpel_avg = s->dsp.avg_qpel_pixels_tab; |
|
530 |
+ s->me.qpel_put = s->qdsp.put_qpel_pixels_tab; |
|
531 |
+ s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab; |
|
532 | 532 |
} else { |
533 |
- s->me.qpel_put = s->dsp.put_no_rnd_qpel_pixels_tab; |
|
534 |
- s->me.qpel_avg = s->dsp.avg_qpel_pixels_tab; |
|
533 |
+ s->me.qpel_put = s->qdsp.put_no_rnd_qpel_pixels_tab; |
|
534 |
+ s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab; |
|
535 | 535 |
} |
536 | 536 |
|
537 | 537 |
if ((ret = ff_MPV_frame_start(s, avctx)) < 0) |
... | ... |
@@ -329,9 +329,11 @@ int ff_init_me(MpegEncContext *s){ |
329 | 329 |
/*FIXME s->no_rounding b_type*/ |
330 | 330 |
if(s->flags&CODEC_FLAG_QPEL){ |
331 | 331 |
c->sub_motion_search= qpel_motion_search; |
332 |
- c->qpel_avg= s->dsp.avg_qpel_pixels_tab; |
|
333 |
- if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab; |
|
334 |
- else c->qpel_put= s->dsp.put_qpel_pixels_tab; |
|
332 |
+ c->qpel_avg = s->qdsp.avg_qpel_pixels_tab; |
|
333 |
+ if (s->no_rounding) |
|
334 |
+ c->qpel_put = s->qdsp.put_no_rnd_qpel_pixels_tab; |
|
335 |
+ else |
|
336 |
+ c->qpel_put = s->qdsp.put_qpel_pixels_tab; |
|
335 | 337 |
}else{ |
336 | 338 |
if(c->avctx->me_sub_cmp&FF_CMP_CHROMA) |
337 | 339 |
c->sub_motion_search= hpel_motion_search; |
... | ... |
@@ -622,9 +624,9 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift) |
622 | 622 |
dxy = ((my4 & 3) << 2) | (mx4 & 3); |
623 | 623 |
|
624 | 624 |
if(s->no_rounding) |
625 |
- s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , stride); |
|
625 |
+ s->qdsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y, ref, stride); |
|
626 | 626 |
else |
627 |
- s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride); |
|
627 |
+ s->qdsp.put_qpel_pixels_tab[1][dxy](dest_y, ref, stride); |
|
628 | 628 |
}else{ |
629 | 629 |
uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride; |
630 | 630 |
dxy = ((my4 & 1) << 1) | (mx4 & 1); |
... | ... |
@@ -1208,14 +1210,14 @@ static inline int check_bidir_mv(MpegEncContext * s, |
1208 | 1208 |
src_y = motion_fy >> 2; |
1209 | 1209 |
|
1210 | 1210 |
ptr = ref_data[0] + (src_y * stride) + src_x; |
1211 |
- s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , stride); |
|
1211 |
+ s->qdsp.put_qpel_pixels_tab[0][dxy](dest_y, ptr, stride); |
|
1212 | 1212 |
|
1213 | 1213 |
dxy = ((motion_by & 3) << 2) | (motion_bx & 3); |
1214 | 1214 |
src_x = motion_bx >> 2; |
1215 | 1215 |
src_y = motion_by >> 2; |
1216 | 1216 |
|
1217 | 1217 |
ptr = ref2_data[0] + (src_y * stride) + src_x; |
1218 |
- s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y , ptr , stride); |
|
1218 |
+ s->qdsp.avg_qpel_pixels_tab[size][dxy](dest_y, ptr, stride); |
|
1219 | 1219 |
}else{ |
1220 | 1220 |
dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); |
1221 | 1221 |
src_x = motion_fx >> 1; |
... | ... |
@@ -38,6 +38,7 @@ |
38 | 38 |
#include "ratecontrol.h" |
39 | 39 |
#include "parser.h" |
40 | 40 |
#include "mpeg12data.h" |
41 |
+#include "qpeldsp.h" |
|
41 | 42 |
#include "rl.h" |
42 | 43 |
#include "thread.h" |
43 | 44 |
#include "videodsp.h" |
... | ... |
@@ -348,6 +349,7 @@ typedef struct MpegEncContext { |
348 | 348 |
|
349 | 349 |
DSPContext dsp; ///< pointers for accelerated dsp functions |
350 | 350 |
HpelDSPContext hdsp; |
351 |
+ QpelDSPContext qdsp; |
|
351 | 352 |
VideoDSPContext vdsp; |
352 | 353 |
H263DSPContext h263dsp; |
353 | 354 |
int f_code; ///< forward MV resolution |
... | ... |
@@ -46,6 +46,7 @@ |
46 | 46 |
#include "mpegutils.h" |
47 | 47 |
#include "mjpegenc.h" |
48 | 48 |
#include "msmpeg4.h" |
49 |
+#include "qpeldsp.h" |
|
49 | 50 |
#include "faandct.h" |
50 | 51 |
#include "thread.h" |
51 | 52 |
#include "aandcttab.h" |
... | ... |
@@ -687,6 +688,8 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) |
687 | 687 |
if (ARCH_X86) |
688 | 688 |
ff_MPV_encode_init_x86(s); |
689 | 689 |
|
690 |
+ ff_qpeldsp_init(&s->qdsp); |
|
691 |
+ |
|
690 | 692 |
s->avctx->coded_frame = s->current_picture.f; |
691 | 693 |
|
692 | 694 |
if (s->msmpeg4_version) { |
... | ... |
@@ -1944,10 +1947,10 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, |
1944 | 1944 |
|
1945 | 1945 |
if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) { |
1946 | 1946 |
op_pix = s->hdsp.put_pixels_tab; |
1947 |
- op_qpix = s->dsp.put_qpel_pixels_tab; |
|
1947 |
+ op_qpix = s->qdsp.put_qpel_pixels_tab; |
|
1948 | 1948 |
} else { |
1949 | 1949 |
op_pix = s->hdsp.put_no_rnd_pixels_tab; |
1950 |
- op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab; |
|
1950 |
+ op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab; |
|
1951 | 1951 |
} |
1952 | 1952 |
|
1953 | 1953 |
if (s->mv_dir & MV_DIR_FORWARD) { |
... | ... |
@@ -1955,7 +1958,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, |
1955 | 1955 |
s->last_picture.f->data, |
1956 | 1956 |
op_pix, op_qpix); |
1957 | 1957 |
op_pix = s->hdsp.avg_pixels_tab; |
1958 |
- op_qpix = s->dsp.avg_qpel_pixels_tab; |
|
1958 |
+ op_qpix = s->qdsp.avg_qpel_pixels_tab; |
|
1959 | 1959 |
} |
1960 | 1960 |
if (s->mv_dir & MV_DIR_BACKWARD) { |
1961 | 1961 |
ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1, |
... | ... |
@@ -27,6 +27,7 @@ |
27 | 27 |
#include "error_resilience.h" |
28 | 28 |
#include "internal.h" |
29 | 29 |
#include "msmpeg4data.h" |
30 |
+#include "qpeldsp.h" |
|
30 | 31 |
#include "vc1.h" |
31 | 32 |
#include "mss12.h" |
32 | 33 |
#include "mss2dsp.h" |
... | ... |
@@ -37,6 +38,7 @@ typedef struct MSS2Context { |
37 | 37 |
AVFrame *last_pic; |
38 | 38 |
MSS12Context c; |
39 | 39 |
MSS2DSPContext dsp; |
40 |
+ QpelDSPContext qdsp; |
|
40 | 41 |
SliceContext sc[2]; |
41 | 42 |
} MSS2Context; |
42 | 43 |
|
... | ... |
@@ -787,8 +789,8 @@ static av_cold int wmv9_init(AVCodecContext *avctx) |
787 | 787 |
return ret; |
788 | 788 |
|
789 | 789 |
/* error concealment */ |
790 |
- v->s.me.qpel_put = v->s.dsp.put_qpel_pixels_tab; |
|
791 |
- v->s.me.qpel_avg = v->s.dsp.avg_qpel_pixels_tab; |
|
790 |
+ v->s.me.qpel_put = v->s.qdsp.put_qpel_pixels_tab; |
|
791 |
+ v->s.me.qpel_avg = v->s.qdsp.avg_qpel_pixels_tab; |
|
792 | 792 |
|
793 | 793 |
return 0; |
794 | 794 |
} |
... | ... |
@@ -827,6 +829,7 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx) |
827 | 827 |
return ret; |
828 | 828 |
} |
829 | 829 |
ff_mss2dsp_init(&ctx->dsp); |
830 |
+ ff_qpeldsp_init(&ctx->qdsp); |
|
830 | 831 |
|
831 | 832 |
avctx->pix_fmt = c->free_colours == 127 ? AV_PIX_FMT_RGB555 |
832 | 833 |
: AV_PIX_FMT_RGB24; |
833 | 834 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,219 @@ |
0 |
+/* |
|
1 |
+ * quarterpel DSP function templates |
|
2 |
+ * |
|
3 |
+ * This file is part of Libav. |
|
4 |
+ * |
|
5 |
+ * Libav is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * Libav is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with Libav; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+/** |
|
21 |
+ * @file |
|
22 |
+ * quarterpel DSP function templates |
|
23 |
+ */ |
|
24 |
+ |
|
25 |
+#define PIXOP2(OPNAME, OP) \ |
|
26 |
+static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \ |
|
27 |
+ const uint8_t *src1, \ |
|
28 |
+ const uint8_t *src2, \ |
|
29 |
+ int dst_stride, \ |
|
30 |
+ int src_stride1, \ |
|
31 |
+ int src_stride2, \ |
|
32 |
+ int h) \ |
|
33 |
+{ \ |
|
34 |
+ int i; \ |
|
35 |
+ \ |
|
36 |
+ for (i = 0; i < h; i++) { \ |
|
37 |
+ uint32_t a, b; \ |
|
38 |
+ a = AV_RN32(&src1[i * src_stride1]); \ |
|
39 |
+ b = AV_RN32(&src2[i * src_stride2]); \ |
|
40 |
+ OP(*((uint32_t *) &dst[i * dst_stride]), \ |
|
41 |
+ no_rnd_avg32(a, b)); \ |
|
42 |
+ a = AV_RN32(&src1[i * src_stride1 + 4]); \ |
|
43 |
+ b = AV_RN32(&src2[i * src_stride2 + 4]); \ |
|
44 |
+ OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ |
|
45 |
+ no_rnd_avg32(a, b)); \ |
|
46 |
+ } \ |
|
47 |
+} \ |
|
48 |
+ \ |
|
49 |
+static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst, \ |
|
50 |
+ const uint8_t *src1, \ |
|
51 |
+ const uint8_t *src2, \ |
|
52 |
+ int dst_stride, \ |
|
53 |
+ int src_stride1, \ |
|
54 |
+ int src_stride2, \ |
|
55 |
+ int h) \ |
|
56 |
+{ \ |
|
57 |
+ OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride, \ |
|
58 |
+ src_stride1, src_stride2, h); \ |
|
59 |
+ OPNAME ## _no_rnd_pixels8_l2_8(dst + 8, \ |
|
60 |
+ src1 + 8, \ |
|
61 |
+ src2 + 8, \ |
|
62 |
+ dst_stride, src_stride1, \ |
|
63 |
+ src_stride2, h); \ |
|
64 |
+} \ |
|
65 |
+ \ |
|
66 |
+static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst, \ |
|
67 |
+ const uint8_t *src1, \ |
|
68 |
+ const uint8_t *src2, \ |
|
69 |
+ const uint8_t *src3, \ |
|
70 |
+ const uint8_t *src4, \ |
|
71 |
+ int dst_stride, \ |
|
72 |
+ int src_stride1, \ |
|
73 |
+ int src_stride2, \ |
|
74 |
+ int src_stride3, \ |
|
75 |
+ int src_stride4, \ |
|
76 |
+ int h) \ |
|
77 |
+{ \ |
|
78 |
+ /* FIXME HIGH BIT DEPTH */ \ |
|
79 |
+ int i; \ |
|
80 |
+ \ |
|
81 |
+ for (i = 0; i < h; i++) { \ |
|
82 |
+ uint32_t a, b, c, d, l0, l1, h0, h1; \ |
|
83 |
+ a = AV_RN32(&src1[i * src_stride1]); \ |
|
84 |
+ b = AV_RN32(&src2[i * src_stride2]); \ |
|
85 |
+ c = AV_RN32(&src3[i * src_stride3]); \ |
|
86 |
+ d = AV_RN32(&src4[i * src_stride4]); \ |
|
87 |
+ l0 = (a & 0x03030303UL) + \ |
|
88 |
+ (b & 0x03030303UL) + \ |
|
89 |
+ 0x02020202UL; \ |
|
90 |
+ h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
|
91 |
+ ((b & 0xFCFCFCFCUL) >> 2); \ |
|
92 |
+ l1 = (c & 0x03030303UL) + \ |
|
93 |
+ (d & 0x03030303UL); \ |
|
94 |
+ h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ |
|
95 |
+ ((d & 0xFCFCFCFCUL) >> 2); \ |
|
96 |
+ OP(*((uint32_t *) &dst[i * dst_stride]), \ |
|
97 |
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
|
98 |
+ a = AV_RN32(&src1[i * src_stride1 + 4]); \ |
|
99 |
+ b = AV_RN32(&src2[i * src_stride2 + 4]); \ |
|
100 |
+ c = AV_RN32(&src3[i * src_stride3 + 4]); \ |
|
101 |
+ d = AV_RN32(&src4[i * src_stride4 + 4]); \ |
|
102 |
+ l0 = (a & 0x03030303UL) + \ |
|
103 |
+ (b & 0x03030303UL) + \ |
|
104 |
+ 0x02020202UL; \ |
|
105 |
+ h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
|
106 |
+ ((b & 0xFCFCFCFCUL) >> 2); \ |
|
107 |
+ l1 = (c & 0x03030303UL) + \ |
|
108 |
+ (d & 0x03030303UL); \ |
|
109 |
+ h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ |
|
110 |
+ ((d & 0xFCFCFCFCUL) >> 2); \ |
|
111 |
+ OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ |
|
112 |
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
|
113 |
+ } \ |
|
114 |
+} \ |
|
115 |
+ \ |
|
116 |
+static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst, \ |
|
117 |
+ const uint8_t *src1, \ |
|
118 |
+ const uint8_t *src2, \ |
|
119 |
+ const uint8_t *src3, \ |
|
120 |
+ const uint8_t *src4, \ |
|
121 |
+ int dst_stride, \ |
|
122 |
+ int src_stride1, \ |
|
123 |
+ int src_stride2, \ |
|
124 |
+ int src_stride3, \ |
|
125 |
+ int src_stride4, \ |
|
126 |
+ int h) \ |
|
127 |
+{ \ |
|
128 |
+ /* FIXME HIGH BIT DEPTH */ \ |
|
129 |
+ int i; \ |
|
130 |
+ \ |
|
131 |
+ for (i = 0; i < h; i++) { \ |
|
132 |
+ uint32_t a, b, c, d, l0, l1, h0, h1; \ |
|
133 |
+ a = AV_RN32(&src1[i * src_stride1]); \ |
|
134 |
+ b = AV_RN32(&src2[i * src_stride2]); \ |
|
135 |
+ c = AV_RN32(&src3[i * src_stride3]); \ |
|
136 |
+ d = AV_RN32(&src4[i * src_stride4]); \ |
|
137 |
+ l0 = (a & 0x03030303UL) + \ |
|
138 |
+ (b & 0x03030303UL) + \ |
|
139 |
+ 0x01010101UL; \ |
|
140 |
+ h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
|
141 |
+ ((b & 0xFCFCFCFCUL) >> 2); \ |
|
142 |
+ l1 = (c & 0x03030303UL) + \ |
|
143 |
+ (d & 0x03030303UL); \ |
|
144 |
+ h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ |
|
145 |
+ ((d & 0xFCFCFCFCUL) >> 2); \ |
|
146 |
+ OP(*((uint32_t *) &dst[i * dst_stride]), \ |
|
147 |
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
|
148 |
+ a = AV_RN32(&src1[i * src_stride1 + 4]); \ |
|
149 |
+ b = AV_RN32(&src2[i * src_stride2 + 4]); \ |
|
150 |
+ c = AV_RN32(&src3[i * src_stride3 + 4]); \ |
|
151 |
+ d = AV_RN32(&src4[i * src_stride4 + 4]); \ |
|
152 |
+ l0 = (a & 0x03030303UL) + \ |
|
153 |
+ (b & 0x03030303UL) + \ |
|
154 |
+ 0x01010101UL; \ |
|
155 |
+ h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ |
|
156 |
+ ((b & 0xFCFCFCFCUL) >> 2); \ |
|
157 |
+ l1 = (c & 0x03030303UL) + \ |
|
158 |
+ (d & 0x03030303UL); \ |
|
159 |
+ h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ |
|
160 |
+ ((d & 0xFCFCFCFCUL) >> 2); \ |
|
161 |
+ OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ |
|
162 |
+ h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ |
|
163 |
+ } \ |
|
164 |
+} \ |
|
165 |
+ \ |
|
166 |
+static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst, \ |
|
167 |
+ const uint8_t *src1, \ |
|
168 |
+ const uint8_t *src2, \ |
|
169 |
+ const uint8_t *src3, \ |
|
170 |
+ const uint8_t *src4, \ |
|
171 |
+ int dst_stride, \ |
|
172 |
+ int src_stride1, \ |
|
173 |
+ int src_stride2, \ |
|
174 |
+ int src_stride3, \ |
|
175 |
+ int src_stride4, \ |
|
176 |
+ int h) \ |
|
177 |
+{ \ |
|
178 |
+ OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride, \ |
|
179 |
+ src_stride1, src_stride2, src_stride3, \ |
|
180 |
+ src_stride4, h); \ |
|
181 |
+ OPNAME ## _pixels8_l4_8(dst + 8, \ |
|
182 |
+ src1 + 8, src2 + 8, \ |
|
183 |
+ src3 + 8, src4 + 8, \ |
|
184 |
+ dst_stride, src_stride1, src_stride2, \ |
|
185 |
+ src_stride3, src_stride4, h); \ |
|
186 |
+} \ |
|
187 |
+ \ |
|
188 |
+static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst, \ |
|
189 |
+ const uint8_t *src1, \ |
|
190 |
+ const uint8_t *src2, \ |
|
191 |
+ const uint8_t *src3, \ |
|
192 |
+ const uint8_t *src4, \ |
|
193 |
+ int dst_stride, \ |
|
194 |
+ int src_stride1, \ |
|
195 |
+ int src_stride2, \ |
|
196 |
+ int src_stride3, \ |
|
197 |
+ int src_stride4, \ |
|
198 |
+ int h) \ |
|
199 |
+{ \ |
|
200 |
+ OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4, \ |
|
201 |
+ dst_stride, src_stride1, \ |
|
202 |
+ src_stride2, src_stride3, \ |
|
203 |
+ src_stride4, h); \ |
|
204 |
+ OPNAME ## _no_rnd_pixels8_l4_8(dst + 8, \ |
|
205 |
+ src1 + 8, src2 + 8, \ |
|
206 |
+ src3 + 8, src4 + 8, \ |
|
207 |
+ dst_stride, src_stride1, \ |
|
208 |
+ src_stride2, src_stride3, \ |
|
209 |
+ src_stride4, h); \ |
|
210 |
+} \ |
|
211 |
+ |
|
212 |
+#define op_avg(a, b) a = rnd_avg32(a, b) |
|
213 |
+#define op_put(a, b) a = b |
|
214 |
+#define put_no_rnd_pixels8_8_c put_pixels8_8_c |
|
215 |
+PIXOP2(avg, op_avg) |
|
216 |
+PIXOP2(put, op_put) |
|
217 |
+#undef op_avg |
|
218 |
+#undef op_put |
0 | 219 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,764 @@ |
0 |
+/* |
|
1 |
+ * quarterpel DSP functions |
|
2 |
+ * |
|
3 |
+ * This file is part of Libav. |
|
4 |
+ * |
|
5 |
+ * Libav is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * Libav is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with Libav; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+/** |
|
21 |
+ * @file |
|
22 |
+ * quarterpel DSP functions |
|
23 |
+ */ |
|
24 |
+ |
|
25 |
+#include <stddef.h> |
|
26 |
+#include <stdint.h> |
|
27 |
+ |
|
28 |
+#include "config.h" |
|
29 |
+#include "libavutil/attributes.h" |
|
30 |
+#include "copy_block.h" |
|
31 |
+#include "qpeldsp.h" |
|
32 |
+ |
|
33 |
+#define BIT_DEPTH 8 |
|
34 |
+#include "hpel_template.c" |
|
35 |
+#include "tpel_template.c" |
|
36 |
+#include "qpel_template.c" |
|
37 |
+ |
|
38 |
+#define QPEL_MC(r, OPNAME, RND, OP) \ |
|
39 |
+static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \ |
|
40 |
+ int dstStride, int srcStride, \ |
|
41 |
+ int h) \ |
|
42 |
+{ \ |
|
43 |
+ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
|
44 |
+ int i; \ |
|
45 |
+ \ |
|
46 |
+ for (i = 0; i < h; i++) { \ |
|
47 |
+ OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \ |
|
48 |
+ OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \ |
|
49 |
+ OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \ |
|
50 |
+ OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \ |
|
51 |
+ OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \ |
|
52 |
+ OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \ |
|
53 |
+ OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \ |
|
54 |
+ OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \ |
|
55 |
+ dst += dstStride; \ |
|
56 |
+ src += srcStride; \ |
|
57 |
+ } \ |
|
58 |
+} \ |
|
59 |
+ \ |
|
60 |
+static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, \ |
|
61 |
+ int dstStride, int srcStride) \ |
|
62 |
+{ \ |
|
63 |
+ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
|
64 |
+ const int w = 8; \ |
|
65 |
+ int i; \ |
|
66 |
+ \ |
|
67 |
+ for (i = 0; i < w; i++) { \ |
|
68 |
+ const int src0 = src[0 * srcStride]; \ |
|
69 |
+ const int src1 = src[1 * srcStride]; \ |
|
70 |
+ const int src2 = src[2 * srcStride]; \ |
|
71 |
+ const int src3 = src[3 * srcStride]; \ |
|
72 |
+ const int src4 = src[4 * srcStride]; \ |
|
73 |
+ const int src5 = src[5 * srcStride]; \ |
|
74 |
+ const int src6 = src[6 * srcStride]; \ |
|
75 |
+ const int src7 = src[7 * srcStride]; \ |
|
76 |
+ const int src8 = src[8 * srcStride]; \ |
|
77 |
+ OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \ |
|
78 |
+ OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \ |
|
79 |
+ OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \ |
|
80 |
+ OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \ |
|
81 |
+ OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \ |
|
82 |
+ OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \ |
|
83 |
+ OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \ |
|
84 |
+ OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \ |
|
85 |
+ dst++; \ |
|
86 |
+ src++; \ |
|
87 |
+ } \ |
|
88 |
+} \ |
|
89 |
+ \ |
|
90 |
+static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, \ |
|
91 |
+ int dstStride, int srcStride, \ |
|
92 |
+ int h) \ |
|
93 |
+{ \ |
|
94 |
+ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
|
95 |
+ int i; \ |
|
96 |
+ \ |
|
97 |
+ for (i = 0; i < h; i++) { \ |
|
98 |
+ OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \ |
|
99 |
+ OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \ |
|
100 |
+ OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \ |
|
101 |
+ OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \ |
|
102 |
+ OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \ |
|
103 |
+ OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[9])); \ |
|
104 |
+ OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[9]) * 3 - (src[3] + src[10])); \ |
|
105 |
+ OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[9]) * 6 + (src[5] + src[10]) * 3 - (src[4] + src[11])); \ |
|
106 |
+ OP(dst[8], (src[8] + src[9]) * 20 - (src[7] + src[10]) * 6 + (src[6] + src[11]) * 3 - (src[5] + src[12])); \ |
|
107 |
+ OP(dst[9], (src[9] + src[10]) * 20 - (src[8] + src[11]) * 6 + (src[7] + src[12]) * 3 - (src[6] + src[13])); \ |
|
108 |
+ OP(dst[10], (src[10] + src[11]) * 20 - (src[9] + src[12]) * 6 + (src[8] + src[13]) * 3 - (src[7] + src[14])); \ |
|
109 |
+ OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9] + src[14]) * 3 - (src[8] + src[15])); \ |
|
110 |
+ OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9] + src[16])); \ |
|
111 |
+ OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \ |
|
112 |
+ OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \ |
|
113 |
+ OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \ |
|
114 |
+ dst += dstStride; \ |
|
115 |
+ src += srcStride; \ |
|
116 |
+ } \ |
|
117 |
+} \ |
|
118 |
+ \ |
|
119 |
+static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, \ |
|
120 |
+ int dstStride, int srcStride) \ |
|
121 |
+{ \ |
|
122 |
+ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
|
123 |
+ const int w = 16; \ |
|
124 |
+ int i; \ |
|
125 |
+ \ |
|
126 |
+ for (i = 0; i < w; i++) { \ |
|
127 |
+ const int src0 = src[0 * srcStride]; \ |
|
128 |
+ const int src1 = src[1 * srcStride]; \ |
|
129 |
+ const int src2 = src[2 * srcStride]; \ |
|
130 |
+ const int src3 = src[3 * srcStride]; \ |
|
131 |
+ const int src4 = src[4 * srcStride]; \ |
|
132 |
+ const int src5 = src[5 * srcStride]; \ |
|
133 |
+ const int src6 = src[6 * srcStride]; \ |
|
134 |
+ const int src7 = src[7 * srcStride]; \ |
|
135 |
+ const int src8 = src[8 * srcStride]; \ |
|
136 |
+ const int src9 = src[9 * srcStride]; \ |
|
137 |
+ const int src10 = src[10 * srcStride]; \ |
|
138 |
+ const int src11 = src[11 * srcStride]; \ |
|
139 |
+ const int src12 = src[12 * srcStride]; \ |
|
140 |
+ const int src13 = src[13 * srcStride]; \ |
|
141 |
+ const int src14 = src[14 * srcStride]; \ |
|
142 |
+ const int src15 = src[15 * srcStride]; \ |
|
143 |
+ const int src16 = src[16 * srcStride]; \ |
|
144 |
+ OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \ |
|
145 |
+ OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \ |
|
146 |
+ OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \ |
|
147 |
+ OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \ |
|
148 |
+ OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \ |
|
149 |
+ OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src9)); \ |
|
150 |
+ OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src9) * 3 - (src3 + src10)); \ |
|
151 |
+ OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src9) * 6 + (src5 + src10) * 3 - (src4 + src11)); \ |
|
152 |
+ OP(dst[8 * dstStride], (src8 + src9) * 20 - (src7 + src10) * 6 + (src6 + src11) * 3 - (src5 + src12)); \ |
|
153 |
+ OP(dst[9 * dstStride], (src9 + src10) * 20 - (src8 + src11) * 6 + (src7 + src12) * 3 - (src6 + src13)); \ |
|
154 |
+ OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9 + src12) * 6 + (src8 + src13) * 3 - (src7 + src14)); \ |
|
155 |
+ OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9 + src14) * 3 - (src8 + src15)); \ |
|
156 |
+ OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9 + src16)); \ |
|
157 |
+ OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \ |
|
158 |
+ OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \ |
|
159 |
+ OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \ |
|
160 |
+ dst++; \ |
|
161 |
+ src++; \ |
|
162 |
+ } \ |
|
163 |
+} \ |
|
164 |
+ \ |
|
165 |
+static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, \ |
|
166 |
+ ptrdiff_t stride) \ |
|
167 |
+{ \ |
|
168 |
+ uint8_t half[64]; \ |
|
169 |
+ \ |
|
170 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \ |
|
171 |
+ OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8); \ |
|
172 |
+} \ |
|
173 |
+ \ |
|
174 |
+static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, \ |
|
175 |
+ ptrdiff_t stride) \ |
|
176 |
+{ \ |
|
177 |
+ OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8); \ |
|
178 |
+} \ |
|
179 |
+ \ |
|
180 |
+static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, \ |
|
181 |
+ ptrdiff_t stride) \ |
|
182 |
+{ \ |
|
183 |
+ uint8_t half[64]; \ |
|
184 |
+ \ |
|
185 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \ |
|
186 |
+ OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8); \ |
|
187 |
+} \ |
|
188 |
+ \ |
|
189 |
+static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, \ |
|
190 |
+ ptrdiff_t stride) \ |
|
191 |
+{ \ |
|
192 |
+ uint8_t full[16 * 9]; \ |
|
193 |
+ uint8_t half[64]; \ |
|
194 |
+ \ |
|
195 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
196 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \ |
|
197 |
+ OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8); \ |
|
198 |
+} \ |
|
199 |
+ \ |
|
200 |
+static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, \ |
|
201 |
+ ptrdiff_t stride) \ |
|
202 |
+{ \ |
|
203 |
+ uint8_t full[16 * 9]; \ |
|
204 |
+ \ |
|
205 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
206 |
+ OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16); \ |
|
207 |
+} \ |
|
208 |
+ \ |
|
209 |
+static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, \ |
|
210 |
+ ptrdiff_t stride) \ |
|
211 |
+{ \ |
|
212 |
+ uint8_t full[16 * 9]; \ |
|
213 |
+ uint8_t half[64]; \ |
|
214 |
+ \ |
|
215 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
216 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \ |
|
217 |
+ OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8); \ |
|
218 |
+} \ |
|
219 |
+ \ |
|
220 |
+void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, \ |
|
221 |
+ ptrdiff_t stride) \ |
|
222 |
+{ \ |
|
223 |
+ uint8_t full[16 * 9]; \ |
|
224 |
+ uint8_t halfH[72]; \ |
|
225 |
+ uint8_t halfV[64]; \ |
|
226 |
+ uint8_t halfHV[64]; \ |
|
227 |
+ \ |
|
228 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
229 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
230 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \ |
|
231 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
232 |
+ OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, \ |
|
233 |
+ stride, 16, 8, 8, 8, 8); \ |
|
234 |
+} \ |
|
235 |
+ \ |
|
236 |
+static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, \ |
|
237 |
+ ptrdiff_t stride) \ |
|
238 |
+{ \ |
|
239 |
+ uint8_t full[16 * 9]; \ |
|
240 |
+ uint8_t halfH[72]; \ |
|
241 |
+ uint8_t halfHV[64]; \ |
|
242 |
+ \ |
|
243 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
244 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
245 |
+ put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \ |
|
246 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
247 |
+ OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \ |
|
248 |
+} \ |
|
249 |
+ \ |
|
250 |
+void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, \ |
|
251 |
+ ptrdiff_t stride) \ |
|
252 |
+{ \ |
|
253 |
+ uint8_t full[16 * 9]; \ |
|
254 |
+ uint8_t halfH[72]; \ |
|
255 |
+ uint8_t halfV[64]; \ |
|
256 |
+ uint8_t halfHV[64]; \ |
|
257 |
+ \ |
|
258 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
259 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
260 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \ |
|
261 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
262 |
+ OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV, \ |
|
263 |
+ stride, 16, 8, 8, 8, 8); \ |
|
264 |
+} \ |
|
265 |
+ \ |
|
266 |
+static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, \ |
|
267 |
+ ptrdiff_t stride) \ |
|
268 |
+{ \ |
|
269 |
+ uint8_t full[16 * 9]; \ |
|
270 |
+ uint8_t halfH[72]; \ |
|
271 |
+ uint8_t halfHV[64]; \ |
|
272 |
+ \ |
|
273 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
274 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
275 |
+ put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \ |
|
276 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
277 |
+ OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \ |
|
278 |
+} \ |
|
279 |
+ \ |
|
280 |
+void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, \ |
|
281 |
+ ptrdiff_t stride) \ |
|
282 |
+{ \ |
|
283 |
+ uint8_t full[16 * 9]; \ |
|
284 |
+ uint8_t halfH[72]; \ |
|
285 |
+ uint8_t halfV[64]; \ |
|
286 |
+ uint8_t halfHV[64]; \ |
|
287 |
+ \ |
|
288 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
289 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
290 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \ |
|
291 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
292 |
+ OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV, \ |
|
293 |
+ stride, 16, 8, 8, 8, 8); \ |
|
294 |
+} \ |
|
295 |
+ \ |
|
296 |
+static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, \ |
|
297 |
+ ptrdiff_t stride) \ |
|
298 |
+{ \ |
|
299 |
+ uint8_t full[16 * 9]; \ |
|
300 |
+ uint8_t halfH[72]; \ |
|
301 |
+ uint8_t halfHV[64]; \ |
|
302 |
+ \ |
|
303 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
304 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
305 |
+ put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \ |
|
306 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
307 |
+ OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \ |
|
308 |
+} \ |
|
309 |
+ \ |
|
310 |
+void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, \ |
|
311 |
+ ptrdiff_t stride) \ |
|
312 |
+{ \ |
|
313 |
+ uint8_t full[16 * 9]; \ |
|
314 |
+ uint8_t halfH[72]; \ |
|
315 |
+ uint8_t halfV[64]; \ |
|
316 |
+ uint8_t halfHV[64]; \ |
|
317 |
+ \ |
|
318 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
319 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
320 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \ |
|
321 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
322 |
+ OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV, \ |
|
323 |
+ stride, 16, 8, 8, 8, 8); \ |
|
324 |
+} \ |
|
325 |
+ \ |
|
326 |
+static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, \ |
|
327 |
+ ptrdiff_t stride) \ |
|
328 |
+{ \ |
|
329 |
+ uint8_t full[16 * 9]; \ |
|
330 |
+ uint8_t halfH[72]; \ |
|
331 |
+ uint8_t halfHV[64]; \ |
|
332 |
+ \ |
|
333 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
334 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
335 |
+ put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \ |
|
336 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
337 |
+ OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \ |
|
338 |
+} \ |
|
339 |
+ \ |
|
340 |
+static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, \ |
|
341 |
+ ptrdiff_t stride) \ |
|
342 |
+{ \ |
|
343 |
+ uint8_t halfH[72]; \ |
|
344 |
+ uint8_t halfHV[64]; \ |
|
345 |
+ \ |
|
346 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \ |
|
347 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
348 |
+ OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \ |
|
349 |
+} \ |
|
350 |
+ \ |
|
351 |
+static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, \ |
|
352 |
+ ptrdiff_t stride) \ |
|
353 |
+{ \ |
|
354 |
+ uint8_t halfH[72]; \ |
|
355 |
+ uint8_t halfHV[64]; \ |
|
356 |
+ \ |
|
357 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \ |
|
358 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
359 |
+ OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \ |
|
360 |
+} \ |
|
361 |
+ \ |
|
362 |
+void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, \ |
|
363 |
+ ptrdiff_t stride) \ |
|
364 |
+{ \ |
|
365 |
+ uint8_t full[16 * 9]; \ |
|
366 |
+ uint8_t halfH[72]; \ |
|
367 |
+ uint8_t halfV[64]; \ |
|
368 |
+ uint8_t halfHV[64]; \ |
|
369 |
+ \ |
|
370 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
371 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
372 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \ |
|
373 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
374 |
+ OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \ |
|
375 |
+} \ |
|
376 |
+ \ |
|
377 |
+static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, \ |
|
378 |
+ ptrdiff_t stride) \ |
|
379 |
+{ \ |
|
380 |
+ uint8_t full[16 * 9]; \ |
|
381 |
+ uint8_t halfH[72]; \ |
|
382 |
+ \ |
|
383 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
384 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
385 |
+ put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \ |
|
386 |
+ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \ |
|
387 |
+} \ |
|
388 |
+ \ |
|
389 |
+void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, \ |
|
390 |
+ ptrdiff_t stride) \ |
|
391 |
+{ \ |
|
392 |
+ uint8_t full[16 * 9]; \ |
|
393 |
+ uint8_t halfH[72]; \ |
|
394 |
+ uint8_t halfV[64]; \ |
|
395 |
+ uint8_t halfHV[64]; \ |
|
396 |
+ \ |
|
397 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
398 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
399 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \ |
|
400 |
+ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ |
|
401 |
+ OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \ |
|
402 |
+} \ |
|
403 |
+ \ |
|
404 |
+static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, \ |
|
405 |
+ ptrdiff_t stride) \ |
|
406 |
+{ \ |
|
407 |
+ uint8_t full[16 * 9]; \ |
|
408 |
+ uint8_t halfH[72]; \ |
|
409 |
+ \ |
|
410 |
+ copy_block9(full, src, 16, stride, 9); \ |
|
411 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ |
|
412 |
+ put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \ |
|
413 |
+ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \ |
|
414 |
+} \ |
|
415 |
+ \ |
|
416 |
+static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, \ |
|
417 |
+ ptrdiff_t stride) \ |
|
418 |
+{ \ |
|
419 |
+ uint8_t halfH[72]; \ |
|
420 |
+ \ |
|
421 |
+ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \ |
|
422 |
+ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \ |
|
423 |
+} \ |
|
424 |
+ \ |
|
425 |
+static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, \ |
|
426 |
+ ptrdiff_t stride) \ |
|
427 |
+{ \ |
|
428 |
+ uint8_t half[256]; \ |
|
429 |
+ \ |
|
430 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \ |
|
431 |
+ OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16); \ |
|
432 |
+} \ |
|
433 |
+ \ |
|
434 |
+static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, \ |
|
435 |
+ ptrdiff_t stride) \ |
|
436 |
+{ \ |
|
437 |
+ OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16); \ |
|
438 |
+} \ |
|
439 |
+ \ |
|
440 |
+static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, \ |
|
441 |
+ ptrdiff_t stride) \ |
|
442 |
+{ \ |
|
443 |
+ uint8_t half[256]; \ |
|
444 |
+ \ |
|
445 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \ |
|
446 |
+ OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16); \ |
|
447 |
+} \ |
|
448 |
+ \ |
|
449 |
+static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, \ |
|
450 |
+ ptrdiff_t stride) \ |
|
451 |
+{ \ |
|
452 |
+ uint8_t full[24 * 17]; \ |
|
453 |
+ uint8_t half[256]; \ |
|
454 |
+ \ |
|
455 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
456 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \ |
|
457 |
+ OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16); \ |
|
458 |
+} \ |
|
459 |
+ \ |
|
460 |
+static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, \ |
|
461 |
+ ptrdiff_t stride) \ |
|
462 |
+{ \ |
|
463 |
+ uint8_t full[24 * 17]; \ |
|
464 |
+ \ |
|
465 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
466 |
+ OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24); \ |
|
467 |
+} \ |
|
468 |
+ \ |
|
469 |
+static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, \ |
|
470 |
+ ptrdiff_t stride) \ |
|
471 |
+{ \ |
|
472 |
+ uint8_t full[24 * 17]; \ |
|
473 |
+ uint8_t half[256]; \ |
|
474 |
+ \ |
|
475 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
476 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \ |
|
477 |
+ OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16); \ |
|
478 |
+} \ |
|
479 |
+ \ |
|
480 |
+void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, \ |
|
481 |
+ ptrdiff_t stride) \ |
|
482 |
+{ \ |
|
483 |
+ uint8_t full[24 * 17]; \ |
|
484 |
+ uint8_t halfH[272]; \ |
|
485 |
+ uint8_t halfV[256]; \ |
|
486 |
+ uint8_t halfHV[256]; \ |
|
487 |
+ \ |
|
488 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
489 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
490 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \ |
|
491 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
492 |
+ OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, \ |
|
493 |
+ stride, 24, 16, 16, 16, 16); \ |
|
494 |
+} \ |
|
495 |
+ \ |
|
496 |
+static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, \ |
|
497 |
+ ptrdiff_t stride) \ |
|
498 |
+{ \ |
|
499 |
+ uint8_t full[24 * 17]; \ |
|
500 |
+ uint8_t halfH[272]; \ |
|
501 |
+ uint8_t halfHV[256]; \ |
|
502 |
+ \ |
|
503 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
504 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
505 |
+ put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \ |
|
506 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
507 |
+ OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \ |
|
508 |
+} \ |
|
509 |
+ \ |
|
510 |
+void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, \ |
|
511 |
+ ptrdiff_t stride) \ |
|
512 |
+{ \ |
|
513 |
+ uint8_t full[24 * 17]; \ |
|
514 |
+ uint8_t halfH[272]; \ |
|
515 |
+ uint8_t halfV[256]; \ |
|
516 |
+ uint8_t halfHV[256]; \ |
|
517 |
+ \ |
|
518 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
519 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
520 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \ |
|
521 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
522 |
+ OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV, \ |
|
523 |
+ stride, 24, 16, 16, 16, 16); \ |
|
524 |
+} \ |
|
525 |
+ \ |
|
526 |
+static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, \ |
|
527 |
+ ptrdiff_t stride) \ |
|
528 |
+{ \ |
|
529 |
+ uint8_t full[24 * 17]; \ |
|
530 |
+ uint8_t halfH[272]; \ |
|
531 |
+ uint8_t halfHV[256]; \ |
|
532 |
+ \ |
|
533 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
534 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
535 |
+ put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \ |
|
536 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
537 |
+ OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \ |
|
538 |
+} \ |
|
539 |
+ \ |
|
540 |
+void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, \ |
|
541 |
+ ptrdiff_t stride) \ |
|
542 |
+{ \ |
|
543 |
+ uint8_t full[24 * 17]; \ |
|
544 |
+ uint8_t halfH[272]; \ |
|
545 |
+ uint8_t halfV[256]; \ |
|
546 |
+ uint8_t halfHV[256]; \ |
|
547 |
+ \ |
|
548 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
549 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
550 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \ |
|
551 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
552 |
+ OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV, \ |
|
553 |
+ stride, 24, 16, 16, 16, 16); \ |
|
554 |
+} \ |
|
555 |
+ \ |
|
556 |
+static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, \ |
|
557 |
+ ptrdiff_t stride) \ |
|
558 |
+{ \ |
|
559 |
+ uint8_t full[24 * 17]; \ |
|
560 |
+ uint8_t halfH[272]; \ |
|
561 |
+ uint8_t halfHV[256]; \ |
|
562 |
+ \ |
|
563 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
564 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
565 |
+ put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \ |
|
566 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
567 |
+ OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \ |
|
568 |
+} \ |
|
569 |
+ \ |
|
570 |
+void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, \ |
|
571 |
+ ptrdiff_t stride) \ |
|
572 |
+{ \ |
|
573 |
+ uint8_t full[24 * 17]; \ |
|
574 |
+ uint8_t halfH[272]; \ |
|
575 |
+ uint8_t halfV[256]; \ |
|
576 |
+ uint8_t halfHV[256]; \ |
|
577 |
+ \ |
|
578 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
579 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
580 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \ |
|
581 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
582 |
+ OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV, \ |
|
583 |
+ stride, 24, 16, 16, 16, 16); \ |
|
584 |
+} \ |
|
585 |
+ \ |
|
586 |
+static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, \ |
|
587 |
+ ptrdiff_t stride) \ |
|
588 |
+{ \ |
|
589 |
+ uint8_t full[24 * 17]; \ |
|
590 |
+ uint8_t halfH[272]; \ |
|
591 |
+ uint8_t halfHV[256]; \ |
|
592 |
+ \ |
|
593 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
594 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
595 |
+ put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \ |
|
596 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
597 |
+ OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \ |
|
598 |
+} \ |
|
599 |
+ \ |
|
600 |
+static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, \ |
|
601 |
+ ptrdiff_t stride) \ |
|
602 |
+{ \ |
|
603 |
+ uint8_t halfH[272]; \ |
|
604 |
+ uint8_t halfHV[256]; \ |
|
605 |
+ \ |
|
606 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \ |
|
607 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
608 |
+ OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \ |
|
609 |
+} \ |
|
610 |
+ \ |
|
611 |
+static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, \ |
|
612 |
+ ptrdiff_t stride) \ |
|
613 |
+{ \ |
|
614 |
+ uint8_t halfH[272]; \ |
|
615 |
+ uint8_t halfHV[256]; \ |
|
616 |
+ \ |
|
617 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \ |
|
618 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
619 |
+ OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \ |
|
620 |
+} \ |
|
621 |
+ \ |
|
622 |
+void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, \ |
|
623 |
+ ptrdiff_t stride) \ |
|
624 |
+{ \ |
|
625 |
+ uint8_t full[24 * 17]; \ |
|
626 |
+ uint8_t halfH[272]; \ |
|
627 |
+ uint8_t halfV[256]; \ |
|
628 |
+ uint8_t halfHV[256]; \ |
|
629 |
+ \ |
|
630 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
631 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
632 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \ |
|
633 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
634 |
+ OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \ |
|
635 |
+} \ |
|
636 |
+ \ |
|
637 |
+static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, \ |
|
638 |
+ ptrdiff_t stride) \ |
|
639 |
+{ \ |
|
640 |
+ uint8_t full[24 * 17]; \ |
|
641 |
+ uint8_t halfH[272]; \ |
|
642 |
+ \ |
|
643 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
644 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
645 |
+ put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \ |
|
646 |
+ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \ |
|
647 |
+} \ |
|
648 |
+ \ |
|
649 |
+void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, \ |
|
650 |
+ ptrdiff_t stride) \ |
|
651 |
+{ \ |
|
652 |
+ uint8_t full[24 * 17]; \ |
|
653 |
+ uint8_t halfH[272]; \ |
|
654 |
+ uint8_t halfV[256]; \ |
|
655 |
+ uint8_t halfHV[256]; \ |
|
656 |
+ \ |
|
657 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
658 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
659 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \ |
|
660 |
+ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ |
|
661 |
+ OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \ |
|
662 |
+} \ |
|
663 |
+ \ |
|
664 |
+static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, \ |
|
665 |
+ ptrdiff_t stride) \ |
|
666 |
+{ \ |
|
667 |
+ uint8_t full[24 * 17]; \ |
|
668 |
+ uint8_t halfH[272]; \ |
|
669 |
+ \ |
|
670 |
+ copy_block17(full, src, 24, stride, 17); \ |
|
671 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ |
|
672 |
+ put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \ |
|
673 |
+ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \ |
|
674 |
+} \ |
|
675 |
+ \ |
|
676 |
+static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, \ |
|
677 |
+ ptrdiff_t stride) \ |
|
678 |
+{ \ |
|
679 |
+ uint8_t halfH[272]; \ |
|
680 |
+ \ |
|
681 |
+ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \ |
|
682 |
+ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \ |
|
683 |
+} |
|
684 |
+ |
|
685 |
+#define op_avg(a, b) a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1) |
|
686 |
+#define op_put(a, b) a = cm[((b) + 16) >> 5] |
|
687 |
+#define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5] |
|
688 |
+ |
|
689 |
+QPEL_MC(0, put_, _, op_put) |
|
690 |
+QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) |
|
691 |
+QPEL_MC(0, avg_, _, op_avg) |
|
692 |
+ |
|
693 |
+#undef op_avg |
|
694 |
+#undef op_put |
|
695 |
+#undef op_put_no_rnd |
|
696 |
+ |
|
697 |
+void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) |
|
698 |
+{ |
|
699 |
+ put_pixels8_8_c(dst, src, stride, 8); |
|
700 |
+} |
|
701 |
+ |
|
702 |
+void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) |
|
703 |
+{ |
|
704 |
+ avg_pixels8_8_c(dst, src, stride, 8); |
|
705 |
+} |
|
706 |
+ |
|
707 |
+void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) |
|
708 |
+{ |
|
709 |
+ put_pixels16_8_c(dst, src, stride, 16); |
|
710 |
+} |
|
711 |
+ |
|
712 |
+void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) |
|
713 |
+{ |
|
714 |
+ avg_pixels16_8_c(dst, src, stride, 16); |
|
715 |
+} |
|
716 |
+ |
|
717 |
+#define put_qpel8_mc00_c ff_put_pixels8x8_c |
|
718 |
+#define avg_qpel8_mc00_c ff_avg_pixels8x8_c |
|
719 |
+#define put_qpel16_mc00_c ff_put_pixels16x16_c |
|
720 |
+#define avg_qpel16_mc00_c ff_avg_pixels16x16_c |
|
721 |
+#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c |
|
722 |
+#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c |
|
723 |
+ |
|
724 |
+void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
725 |
+ int dst_stride, int src_stride1, int src_stride2, |
|
726 |
+ int h) |
|
727 |
+{ |
|
728 |
+ put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); |
|
729 |
+ |
|
730 |
+} |
|
731 |
+ |
|
732 |
+av_cold void ff_qpeldsp_init(QpelDSPContext *c) |
|
733 |
+{ |
|
734 |
+#define dspfunc(PFX, IDX, NUM) \ |
|
735 |
+ c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \ |
|
736 |
+ c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \ |
|
737 |
+ c->PFX ## _pixels_tab[IDX][2] = PFX ## NUM ## _mc20_c; \ |
|
738 |
+ c->PFX ## _pixels_tab[IDX][3] = PFX ## NUM ## _mc30_c; \ |
|
739 |
+ c->PFX ## _pixels_tab[IDX][4] = PFX ## NUM ## _mc01_c; \ |
|
740 |
+ c->PFX ## _pixels_tab[IDX][5] = PFX ## NUM ## _mc11_c; \ |
|
741 |
+ c->PFX ## _pixels_tab[IDX][6] = PFX ## NUM ## _mc21_c; \ |
|
742 |
+ c->PFX ## _pixels_tab[IDX][7] = PFX ## NUM ## _mc31_c; \ |
|
743 |
+ c->PFX ## _pixels_tab[IDX][8] = PFX ## NUM ## _mc02_c; \ |
|
744 |
+ c->PFX ## _pixels_tab[IDX][9] = PFX ## NUM ## _mc12_c; \ |
|
745 |
+ c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ |
|
746 |
+ c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ |
|
747 |
+ c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ |
|
748 |
+ c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ |
|
749 |
+ c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ |
|
750 |
+ c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c |
|
751 |
+ |
|
752 |
+ dspfunc(put_qpel, 0, 16); |
|
753 |
+ dspfunc(put_qpel, 1, 8); |
|
754 |
+ |
|
755 |
+ dspfunc(put_no_rnd_qpel, 0, 16); |
|
756 |
+ dspfunc(put_no_rnd_qpel, 1, 8); |
|
757 |
+ |
|
758 |
+ dspfunc(avg_qpel, 0, 16); |
|
759 |
+ dspfunc(avg_qpel, 1, 8); |
|
760 |
+ |
|
761 |
+ if (ARCH_X86) |
|
762 |
+ ff_qpeldsp_init_x86(c); |
|
763 |
+} |
0 | 764 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,78 @@ |
0 |
+/* |
|
1 |
+ * quarterpel DSP functions |
|
2 |
+ * |
|
3 |
+ * This file is part of Libav. |
|
4 |
+ * |
|
5 |
+ * Libav is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * Libav is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with Libav; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+/** |
|
21 |
+ * @file |
|
22 |
+ * quarterpel DSP functions |
|
23 |
+ */ |
|
24 |
+ |
|
25 |
+#ifndef AVCODEC_QPELDSP_H |
|
26 |
+#define AVCODEC_QPELDSP_H |
|
27 |
+ |
|
28 |
+#include <stddef.h> |
|
29 |
+#include <stdint.h> |
|
30 |
+ |
|
31 |
+void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
|
32 |
+void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
|
33 |
+void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
|
34 |
+void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
|
35 |
+ |
|
36 |
+void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, |
|
37 |
+ int dst_stride, int src_stride1, int src_stride2, |
|
38 |
+ int h); |
|
39 |
+ |
|
40 |
+#define DEF_OLD_QPEL(name) \ |
|
41 |
+ void ff_put_ ## name(uint8_t *dst /* align width (8 or 16) */, \ |
|
42 |
+ uint8_t *src /* align 1 */, ptrdiff_t stride); \ |
|
43 |
+ void ff_put_no_rnd_ ## name(uint8_t *dst /* align width (8 or 16) */, \ |
|
44 |
+ uint8_t *src /* align 1 */, ptrdiff_t stride); \ |
|
45 |
+ void ff_avg_ ## name(uint8_t *dst /* align width (8 or 16) */, \ |
|
46 |
+ uint8_t *src /* align 1 */, ptrdiff_t stride); |
|
47 |
+ |
|
48 |
+DEF_OLD_QPEL(qpel16_mc11_old_c) |
|
49 |
+DEF_OLD_QPEL(qpel16_mc31_old_c) |
|
50 |
+DEF_OLD_QPEL(qpel16_mc12_old_c) |
|
51 |
+DEF_OLD_QPEL(qpel16_mc32_old_c) |
|
52 |
+DEF_OLD_QPEL(qpel16_mc13_old_c) |
|
53 |
+DEF_OLD_QPEL(qpel16_mc33_old_c) |
|
54 |
+DEF_OLD_QPEL(qpel8_mc11_old_c) |
|
55 |
+DEF_OLD_QPEL(qpel8_mc31_old_c) |
|
56 |
+DEF_OLD_QPEL(qpel8_mc12_old_c) |
|
57 |
+DEF_OLD_QPEL(qpel8_mc32_old_c) |
|
58 |
+DEF_OLD_QPEL(qpel8_mc13_old_c) |
|
59 |
+DEF_OLD_QPEL(qpel8_mc33_old_c) |
|
60 |
+ |
|
61 |
+typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */, |
|
62 |
+ uint8_t *src /* align 1 */, ptrdiff_t stride); |
|
63 |
+ |
|
64 |
+/** |
|
65 |
+ * quarterpel DSP context |
|
66 |
+ */ |
|
67 |
+typedef struct QpelDSPContext { |
|
68 |
+ qpel_mc_func put_qpel_pixels_tab[2][16]; |
|
69 |
+ qpel_mc_func avg_qpel_pixels_tab[2][16]; |
|
70 |
+ qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; |
|
71 |
+} QpelDSPContext; |
|
72 |
+ |
|
73 |
+void ff_qpeldsp_init(QpelDSPContext *c); |
|
74 |
+ |
|
75 |
+void ff_qpeldsp_init_x86(QpelDSPContext *c); |
|
76 |
+ |
|
77 |
+#endif /* AVCODEC_QPELDSP_H */ |
... | ... |
@@ -27,8 +27,8 @@ |
27 | 27 |
#ifndef AVCODEC_RV34DSP_H |
28 | 28 |
#define AVCODEC_RV34DSP_H |
29 | 29 |
|
30 |
-#include "dsputil.h" |
|
31 | 30 |
#include "h264chroma.h" |
31 |
+#include "qpeldsp.h" |
|
32 | 32 |
|
33 | 33 |
typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/, |
34 | 34 |
uint8_t *src1/*align width (8 or 16)*/, |
... | ... |
@@ -33,6 +33,7 @@ |
33 | 33 |
#include "mpegvideo.h" |
34 | 34 |
#include "h263.h" |
35 | 35 |
#include "h264chroma.h" |
36 |
+#include "qpeldsp.h" |
|
36 | 37 |
#include "vc1.h" |
37 | 38 |
#include "vc1data.h" |
38 | 39 |
#include "vc1acdata.h" |
... | ... |
@@ -5603,6 +5604,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) |
5603 | 5603 |
if (ff_vc1_init_common(v) < 0) |
5604 | 5604 |
return -1; |
5605 | 5605 |
ff_h264chroma_init(&v->h264chroma, 8); |
5606 |
+ ff_qpeldsp_init(&s->qdsp); |
|
5606 | 5607 |
ff_vc1dsp_init(&v->vc1dsp); |
5607 | 5608 |
|
5608 | 5609 |
if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) { |
... | ... |
@@ -5971,8 +5973,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, |
5971 | 5971 |
s->current_picture_ptr->f->repeat_pict = v->rptfrm * 2; |
5972 | 5972 |
} |
5973 | 5973 |
|
5974 |
- s->me.qpel_put = s->dsp.put_qpel_pixels_tab; |
|
5975 |
- s->me.qpel_avg = s->dsp.avg_qpel_pixels_tab; |
|
5974 |
+ s->me.qpel_put = s->qdsp.put_qpel_pixels_tab; |
|
5975 |
+ s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab; |
|
5976 | 5976 |
|
5977 | 5977 |
if (avctx->hwaccel) { |
5978 | 5978 |
if (avctx->hwaccel->start_frame(avctx, buf, buf_size) < 0) |
... | ... |
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_LPC) += x86/lpc.o |
20 | 20 |
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o |
21 | 21 |
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o |
22 | 22 |
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o |
23 |
+OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o |
|
23 | 24 |
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o |
24 | 25 |
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o |
25 | 26 |
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o |
... | ... |
@@ -44,13 +45,13 @@ OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o |
44 | 44 |
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o |
45 | 45 |
|
46 | 46 |
MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ |
47 |
- x86/fpel_mmx.o \ |
|
48 | 47 |
x86/idct_mmx_xvid.o \ |
49 | 48 |
x86/idct_sse2_xvid.o \ |
50 | 49 |
x86/simple_idct.o |
51 | 50 |
MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ |
52 | 51 |
x86/hpeldsp_mmx.o |
53 | 52 |
MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o |
53 |
+MMX-OBJS-$(CONFIG_QPELDSP) += x86/fpel_mmx.o |
|
54 | 54 |
|
55 | 55 |
MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o |
56 | 56 |
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o |
... | ... |
@@ -61,10 +62,7 @@ YASM-OBJS += x86/deinterlace.o \ |
61 | 61 |
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o |
62 | 62 |
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o |
63 | 63 |
YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o |
64 |
-YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \ |
|
65 |
- x86/fpel.o \ |
|
66 |
- x86/mpeg4qpel.o \ |
|
67 |
- x86/qpel.o |
|
64 |
+YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o |
|
68 | 65 |
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o |
69 | 66 |
YASM-OBJS-$(CONFIG_FFT) += x86/fft.o |
70 | 67 |
YASM-OBJS-$(CONFIG_H263DSP) += x86/h263_loopfilter.o |
... | ... |
@@ -86,6 +84,9 @@ YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \ |
86 | 86 |
x86/hpeldsp.o |
87 | 87 |
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o |
88 | 88 |
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o |
89 |
+YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \ |
|
90 |
+ x86/fpel.o \ |
|
91 |
+ x86/qpel.o |
|
89 | 92 |
YASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o |
90 | 93 |
YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o |
91 | 94 |
|
... | ... |
@@ -23,55 +23,11 @@ |
23 | 23 |
#include "libavutil/x86/cpu.h" |
24 | 24 |
#include "libavcodec/avcodec.h" |
25 | 25 |
#include "libavcodec/dsputil.h" |
26 |
-#include "libavcodec/pixels.h" |
|
27 | 26 |
#include "libavcodec/simple_idct.h" |
28 | 27 |
#include "libavcodec/version.h" |
29 | 28 |
#include "dsputil_x86.h" |
30 |
-#include "fpel.h" |
|
31 | 29 |
#include "idct_xvid.h" |
32 | 30 |
|
33 |
-void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
34 |
- int dstStride, int src1Stride, int h); |
|
35 |
-void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, |
|
36 |
- uint8_t *src2, int dstStride, |
|
37 |
- int src1Stride, int h); |
|
38 |
-void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
39 |
- int dstStride, int src1Stride, int h); |
|
40 |
-void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
41 |
- int dstStride, int src1Stride, int h); |
|
42 |
-void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
43 |
- int dstStride, int src1Stride, int h); |
|
44 |
-void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
45 |
- int dstStride, int src1Stride, int h); |
|
46 |
-void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
47 |
- int dstStride, int srcStride, int h); |
|
48 |
-void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
49 |
- int dstStride, int srcStride, int h); |
|
50 |
-void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
51 |
- int dstStride, int srcStride, |
|
52 |
- int h); |
|
53 |
-void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
54 |
- int dstStride, int srcStride, int h); |
|
55 |
-void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
56 |
- int dstStride, int srcStride, int h); |
|
57 |
-void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
58 |
- int dstStride, int srcStride, |
|
59 |
- int h); |
|
60 |
-void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
61 |
- int dstStride, int srcStride); |
|
62 |
-void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
63 |
- int dstStride, int srcStride); |
|
64 |
-void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
65 |
- int dstStride, int srcStride); |
|
66 |
-void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
67 |
- int dstStride, int srcStride); |
|
68 |
-void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
69 |
- int dstStride, int srcStride); |
|
70 |
-void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
71 |
- int dstStride, int srcStride); |
|
72 |
-#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext |
|
73 |
-#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext |
|
74 |
- |
|
75 | 31 |
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, |
76 | 32 |
int order); |
77 | 33 |
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, |
... | ... |
@@ -89,418 +45,6 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, |
89 | 89 |
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, |
90 | 90 |
int32_t min, int32_t max, unsigned int len); |
91 | 91 |
|
92 |
-#if HAVE_YASM |
|
93 |
- |
|
94 |
-CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8) |
|
95 |
-CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8) |
|
96 |
- |
|
97 |
-#define QPEL_OP(OPNAME, RND, MMX) \ |
|
98 |
-static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
99 |
- ptrdiff_t stride) \ |
|
100 |
-{ \ |
|
101 |
- ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \ |
|
102 |
-} \ |
|
103 |
- \ |
|
104 |
-static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
105 |
- ptrdiff_t stride) \ |
|
106 |
-{ \ |
|
107 |
- uint64_t temp[8]; \ |
|
108 |
- uint8_t *const half = (uint8_t *) temp; \ |
|
109 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ |
|
110 |
- stride, 8); \ |
|
111 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ |
|
112 |
- stride, stride, 8); \ |
|
113 |
-} \ |
|
114 |
- \ |
|
115 |
-static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
116 |
- ptrdiff_t stride) \ |
|
117 |
-{ \ |
|
118 |
- ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \ |
|
119 |
- stride, 8); \ |
|
120 |
-} \ |
|
121 |
- \ |
|
122 |
-static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
123 |
- ptrdiff_t stride) \ |
|
124 |
-{ \ |
|
125 |
- uint64_t temp[8]; \ |
|
126 |
- uint8_t *const half = (uint8_t *) temp; \ |
|
127 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ |
|
128 |
- stride, 8); \ |
|
129 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \ |
|
130 |
- stride, 8); \ |
|
131 |
-} \ |
|
132 |
- \ |
|
133 |
-static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
134 |
- ptrdiff_t stride) \ |
|
135 |
-{ \ |
|
136 |
- uint64_t temp[8]; \ |
|
137 |
- uint8_t *const half = (uint8_t *) temp; \ |
|
138 |
- ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ |
|
139 |
- 8, stride); \ |
|
140 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ |
|
141 |
- stride, stride, 8); \ |
|
142 |
-} \ |
|
143 |
- \ |
|
144 |
-static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
145 |
- ptrdiff_t stride) \ |
|
146 |
-{ \ |
|
147 |
- ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \ |
|
148 |
- stride, stride); \ |
|
149 |
-} \ |
|
150 |
- \ |
|
151 |
-static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
152 |
- ptrdiff_t stride) \ |
|
153 |
-{ \ |
|
154 |
- uint64_t temp[8]; \ |
|
155 |
- uint8_t *const half = (uint8_t *) temp; \ |
|
156 |
- ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ |
|
157 |
- 8, stride); \ |
|
158 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\ |
|
159 |
- stride, 8); \ |
|
160 |
-} \ |
|
161 |
- \ |
|
162 |
-static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
163 |
- ptrdiff_t stride) \ |
|
164 |
-{ \ |
|
165 |
- uint64_t half[8 + 9]; \ |
|
166 |
- uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
167 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
168 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
169 |
- stride, 9); \ |
|
170 |
- ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ |
|
171 |
- stride, 9); \ |
|
172 |
- ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
173 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ |
|
174 |
- stride, 8, 8); \ |
|
175 |
-} \ |
|
176 |
- \ |
|
177 |
-static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
178 |
- ptrdiff_t stride) \ |
|
179 |
-{ \ |
|
180 |
- uint64_t half[8 + 9]; \ |
|
181 |
- uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
182 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
183 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
184 |
- stride, 9); \ |
|
185 |
- ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ |
|
186 |
- stride, 9); \ |
|
187 |
- ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
188 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ |
|
189 |
- stride, 8, 8); \ |
|
190 |
-} \ |
|
191 |
- \ |
|
192 |
-static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
193 |
- ptrdiff_t stride) \ |
|
194 |
-{ \ |
|
195 |
- uint64_t half[8 + 9]; \ |
|
196 |
- uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
197 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
198 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
199 |
- stride, 9); \ |
|
200 |
- ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ |
|
201 |
- stride, 9); \ |
|
202 |
- ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
203 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ |
|
204 |
- stride, 8, 8); \ |
|
205 |
-} \ |
|
206 |
- \ |
|
207 |
-static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
208 |
- ptrdiff_t stride) \ |
|
209 |
-{ \ |
|
210 |
- uint64_t half[8 + 9]; \ |
|
211 |
- uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
212 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
213 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
214 |
- stride, 9); \ |
|
215 |
- ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ |
|
216 |
- stride, 9); \ |
|
217 |
- ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
218 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ |
|
219 |
- stride, 8, 8); \ |
|
220 |
-} \ |
|
221 |
- \ |
|
222 |
-static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
223 |
- ptrdiff_t stride) \ |
|
224 |
-{ \ |
|
225 |
- uint64_t half[8 + 9]; \ |
|
226 |
- uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
227 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
228 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
229 |
- stride, 9); \ |
|
230 |
- ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
231 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ |
|
232 |
- stride, 8, 8); \ |
|
233 |
-} \ |
|
234 |
- \ |
|
235 |
-static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
236 |
- ptrdiff_t stride) \ |
|
237 |
-{ \ |
|
238 |
- uint64_t half[8 + 9]; \ |
|
239 |
- uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
240 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
241 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
242 |
- stride, 9); \ |
|
243 |
- ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
244 |
- ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ |
|
245 |
- stride, 8, 8); \ |
|
246 |
-} \ |
|
247 |
- \ |
|
248 |
-static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
249 |
- ptrdiff_t stride) \ |
|
250 |
-{ \ |
|
251 |
- uint64_t half[8 + 9]; \ |
|
252 |
- uint8_t *const halfH = (uint8_t *) half; \ |
|
253 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
254 |
- stride, 9); \ |
|
255 |
- ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \ |
|
256 |
- 8, stride, 9); \ |
|
257 |
- ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ |
|
258 |
- stride, 8); \ |
|
259 |
-} \ |
|
260 |
- \ |
|
261 |
-static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
262 |
- ptrdiff_t stride) \ |
|
263 |
-{ \ |
|
264 |
- uint64_t half[8 + 9]; \ |
|
265 |
- uint8_t *const halfH = (uint8_t *) half; \ |
|
266 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
267 |
- stride, 9); \ |
|
268 |
- ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ |
|
269 |
- stride, 9); \ |
|
270 |
- ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ |
|
271 |
- stride, 8); \ |
|
272 |
-} \ |
|
273 |
- \ |
|
274 |
-static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
275 |
- ptrdiff_t stride) \ |
|
276 |
-{ \ |
|
277 |
- uint64_t half[9]; \ |
|
278 |
- uint8_t *const halfH = (uint8_t *) half; \ |
|
279 |
- ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
280 |
- stride, 9); \ |
|
281 |
- ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ |
|
282 |
- stride, 8); \ |
|
283 |
-} \ |
|
284 |
- \ |
|
285 |
-static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
286 |
- ptrdiff_t stride) \ |
|
287 |
-{ \ |
|
288 |
- ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \ |
|
289 |
-} \ |
|
290 |
- \ |
|
291 |
-static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
292 |
- ptrdiff_t stride) \ |
|
293 |
-{ \ |
|
294 |
- uint64_t temp[32]; \ |
|
295 |
- uint8_t *const half = (uint8_t *) temp; \ |
|
296 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ |
|
297 |
- stride, 16); \ |
|
298 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ |
|
299 |
- stride, 16); \ |
|
300 |
-} \ |
|
301 |
- \ |
|
302 |
-static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
303 |
- ptrdiff_t stride) \ |
|
304 |
-{ \ |
|
305 |
- ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \ |
|
306 |
- stride, stride, 16);\ |
|
307 |
-} \ |
|
308 |
- \ |
|
309 |
-static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
310 |
- ptrdiff_t stride) \ |
|
311 |
-{ \ |
|
312 |
- uint64_t temp[32]; \ |
|
313 |
- uint8_t *const half = (uint8_t*) temp; \ |
|
314 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ |
|
315 |
- stride, 16); \ |
|
316 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \ |
|
317 |
- stride, stride, 16); \ |
|
318 |
-} \ |
|
319 |
- \ |
|
320 |
-static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
321 |
- ptrdiff_t stride) \ |
|
322 |
-{ \ |
|
323 |
- uint64_t temp[32]; \ |
|
324 |
- uint8_t *const half = (uint8_t *) temp; \ |
|
325 |
- ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ |
|
326 |
- stride); \ |
|
327 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ |
|
328 |
- stride, 16); \ |
|
329 |
-} \ |
|
330 |
- \ |
|
331 |
-static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
332 |
- ptrdiff_t stride) \ |
|
333 |
-{ \ |
|
334 |
- ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \ |
|
335 |
- stride, stride); \ |
|
336 |
-} \ |
|
337 |
- \ |
|
338 |
-static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
339 |
- ptrdiff_t stride) \ |
|
340 |
-{ \ |
|
341 |
- uint64_t temp[32]; \ |
|
342 |
- uint8_t *const half = (uint8_t *) temp; \ |
|
343 |
- ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ |
|
344 |
- stride); \ |
|
345 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \ |
|
346 |
- stride, stride, 16); \ |
|
347 |
-} \ |
|
348 |
- \ |
|
349 |
-static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
350 |
- ptrdiff_t stride) \ |
|
351 |
-{ \ |
|
352 |
- uint64_t half[16 * 2 + 17 * 2]; \ |
|
353 |
- uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
354 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
355 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
356 |
- stride, 17); \ |
|
357 |
- ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ |
|
358 |
- stride, 17); \ |
|
359 |
- ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
360 |
- 16, 16); \ |
|
361 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ |
|
362 |
- stride, 16, 16); \ |
|
363 |
-} \ |
|
364 |
- \ |
|
365 |
-static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
366 |
- ptrdiff_t stride) \ |
|
367 |
-{ \ |
|
368 |
- uint64_t half[16 * 2 + 17 * 2]; \ |
|
369 |
- uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
370 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
371 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
372 |
- stride, 17); \ |
|
373 |
- ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ |
|
374 |
- stride, 17); \ |
|
375 |
- ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
376 |
- 16, 16); \ |
|
377 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ |
|
378 |
- stride, 16, 16); \ |
|
379 |
-} \ |
|
380 |
- \ |
|
381 |
-static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
382 |
- ptrdiff_t stride) \ |
|
383 |
-{ \ |
|
384 |
- uint64_t half[16 * 2 + 17 * 2]; \ |
|
385 |
- uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
386 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
387 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
388 |
- stride, 17); \ |
|
389 |
- ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ |
|
390 |
- stride, 17); \ |
|
391 |
- ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
392 |
- 16, 16); \ |
|
393 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ |
|
394 |
- stride, 16, 16); \ |
|
395 |
-} \ |
|
396 |
- \ |
|
397 |
-static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
398 |
- ptrdiff_t stride) \ |
|
399 |
-{ \ |
|
400 |
- uint64_t half[16 * 2 + 17 * 2]; \ |
|
401 |
- uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
402 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
403 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
404 |
- stride, 17); \ |
|
405 |
- ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ |
|
406 |
- stride, 17); \ |
|
407 |
- ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
408 |
- 16, 16); \ |
|
409 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ |
|
410 |
- stride, 16, 16); \ |
|
411 |
-} \ |
|
412 |
- \ |
|
413 |
-static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
414 |
- ptrdiff_t stride) \ |
|
415 |
-{ \ |
|
416 |
- uint64_t half[16 * 2 + 17 * 2]; \ |
|
417 |
- uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
418 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
419 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
420 |
- stride, 17); \ |
|
421 |
- ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
422 |
- 16, 16); \ |
|
423 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ |
|
424 |
- stride, 16, 16); \ |
|
425 |
-} \ |
|
426 |
- \ |
|
427 |
-static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
428 |
- ptrdiff_t stride) \ |
|
429 |
-{ \ |
|
430 |
- uint64_t half[16 * 2 + 17 * 2]; \ |
|
431 |
- uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
432 |
- uint8_t *const halfHV = (uint8_t *) half; \ |
|
433 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
434 |
- stride, 17); \ |
|
435 |
- ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
436 |
- 16, 16); \ |
|
437 |
- ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ |
|
438 |
- stride, 16, 16); \ |
|
439 |
-} \ |
|
440 |
- \ |
|
441 |
-static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
442 |
- ptrdiff_t stride) \ |
|
443 |
-{ \ |
|
444 |
- uint64_t half[17 * 2]; \ |
|
445 |
- uint8_t *const halfH = (uint8_t *) half; \ |
|
446 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
447 |
- stride, 17); \ |
|
448 |
- ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ |
|
449 |
- stride, 17); \ |
|
450 |
- ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ |
|
451 |
- stride, 16); \ |
|
452 |
-} \ |
|
453 |
- \ |
|
454 |
-static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
455 |
- ptrdiff_t stride) \ |
|
456 |
-{ \ |
|
457 |
- uint64_t half[17 * 2]; \ |
|
458 |
- uint8_t *const halfH = (uint8_t *) half; \ |
|
459 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
460 |
- stride, 17); \ |
|
461 |
- ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ |
|
462 |
- stride, 17); \ |
|
463 |
- ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ |
|
464 |
- stride, 16); \ |
|
465 |
-} \ |
|
466 |
- \ |
|
467 |
-static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
468 |
- ptrdiff_t stride) \ |
|
469 |
-{ \ |
|
470 |
- uint64_t half[17 * 2]; \ |
|
471 |
- uint8_t *const halfH = (uint8_t *) half; \ |
|
472 |
- ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
473 |
- stride, 17); \ |
|
474 |
- ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ |
|
475 |
- stride, 16); \ |
|
476 |
-} |
|
477 |
- |
|
478 |
-QPEL_OP(put_, _, mmxext) |
|
479 |
-QPEL_OP(avg_, _, mmxext) |
|
480 |
-QPEL_OP(put_no_rnd_, _no_rnd_, mmxext) |
|
481 |
- |
|
482 |
-#endif /* HAVE_YASM */ |
|
483 |
- |
|
484 |
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ |
|
485 |
-do { \ |
|
486 |
- c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ |
|
487 |
- c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ |
|
488 |
- c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ |
|
489 |
- c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ |
|
490 |
- c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ |
|
491 |
- c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ |
|
492 |
- c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ |
|
493 |
- c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ |
|
494 |
- c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ |
|
495 |
- c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ |
|
496 |
- c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ |
|
497 |
- c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ |
|
498 |
- c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ |
|
499 |
- c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ |
|
500 |
- c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ |
|
501 |
- c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ |
|
502 |
-} while (0) |
|
503 |
- |
|
504 | 92 |
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, |
505 | 93 |
int cpu_flags, unsigned high_bit_depth) |
506 | 94 |
{ |
... | ... |
@@ -550,14 +94,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, |
550 | 550 |
#endif /* HAVE_MMXEXT_INLINE */ |
551 | 551 |
|
552 | 552 |
#if HAVE_MMXEXT_EXTERNAL |
553 |
- SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, ); |
|
554 |
- SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, ); |
|
555 |
- |
|
556 |
- SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, ); |
|
557 |
- SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, ); |
|
558 |
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); |
|
559 |
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); |
|
560 |
- |
|
561 | 553 |
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; |
562 | 554 |
#endif /* HAVE_MMXEXT_EXTERNAL */ |
563 | 555 |
} |
564 | 556 |
deleted file mode 100644 |
... | ... |
@@ -1,558 +0,0 @@ |
1 |
-;****************************************************************************** |
|
2 |
-;* mpeg4 qpel |
|
3 |
-;* Copyright (c) 2008 Loren Merritt |
|
4 |
-;* |
|
5 |
-;* This file is part of Libav. |
|
6 |
-;* |
|
7 |
-;* Libav is free software; you can redistribute it and/or |
|
8 |
-;* modify it under the terms of the GNU Lesser General Public |
|
9 |
-;* License as published by the Free Software Foundation; either |
|
10 |
-;* version 2.1 of the License, or (at your option) any later version. |
|
11 |
-;* |
|
12 |
-;* Libav is distributed in the hope that it will be useful, |
|
13 |
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 |
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
15 |
-;* Lesser General Public License for more details. |
|
16 |
-;* |
|
17 |
-;* You should have received a copy of the GNU Lesser General Public |
|
18 |
-;* License along with Libav; if not, write to the Free Software |
|
19 |
-;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
20 |
-;****************************************************************************** |
|
21 |
- |
|
22 |
-%include "libavutil/x86/x86util.asm" |
|
23 |
- |
|
24 |
-SECTION_RODATA |
|
25 |
-cextern pb_1 |
|
26 |
-cextern pw_3 |
|
27 |
-cextern pw_15 |
|
28 |
-cextern pw_16 |
|
29 |
-cextern pw_20 |
|
30 |
- |
|
31 |
- |
|
32 |
-SECTION_TEXT |
|
33 |
- |
|
34 |
-; void ff_put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
|
35 |
-%macro PUT_NO_RND_PIXELS8_L2 0 |
|
36 |
-cglobal put_no_rnd_pixels8_l2, 6,6 |
|
37 |
- movsxdifnidn r4, r4d |
|
38 |
- movsxdifnidn r3, r3d |
|
39 |
- pcmpeqb m6, m6 |
|
40 |
- test r5d, 1 |
|
41 |
- je .loop |
|
42 |
- mova m0, [r1] |
|
43 |
- mova m1, [r2] |
|
44 |
- add r1, r4 |
|
45 |
- add r2, 8 |
|
46 |
- pxor m0, m6 |
|
47 |
- pxor m1, m6 |
|
48 |
- PAVGB m0, m1 |
|
49 |
- pxor m0, m6 |
|
50 |
- mova [r0], m0 |
|
51 |
- add r0, r3 |
|
52 |
- dec r5d |
|
53 |
-.loop: |
|
54 |
- mova m0, [r1] |
|
55 |
- add r1, r4 |
|
56 |
- mova m1, [r1] |
|
57 |
- add r1, r4 |
|
58 |
- mova m2, [r2] |
|
59 |
- mova m3, [r2+8] |
|
60 |
- pxor m0, m6 |
|
61 |
- pxor m1, m6 |
|
62 |
- pxor m2, m6 |
|
63 |
- pxor m3, m6 |
|
64 |
- PAVGB m0, m2 |
|
65 |
- PAVGB m1, m3 |
|
66 |
- pxor m0, m6 |
|
67 |
- pxor m1, m6 |
|
68 |
- mova [r0], m0 |
|
69 |
- add r0, r3 |
|
70 |
- mova [r0], m1 |
|
71 |
- add r0, r3 |
|
72 |
- mova m0, [r1] |
|
73 |
- add r1, r4 |
|
74 |
- mova m1, [r1] |
|
75 |
- add r1, r4 |
|
76 |
- mova m2, [r2+16] |
|
77 |
- mova m3, [r2+24] |
|
78 |
- pxor m0, m6 |
|
79 |
- pxor m1, m6 |
|
80 |
- pxor m2, m6 |
|
81 |
- pxor m3, m6 |
|
82 |
- PAVGB m0, m2 |
|
83 |
- PAVGB m1, m3 |
|
84 |
- pxor m0, m6 |
|
85 |
- pxor m1, m6 |
|
86 |
- mova [r0], m0 |
|
87 |
- add r0, r3 |
|
88 |
- mova [r0], m1 |
|
89 |
- add r0, r3 |
|
90 |
- add r2, 32 |
|
91 |
- sub r5d, 4 |
|
92 |
- jne .loop |
|
93 |
- REP_RET |
|
94 |
-%endmacro |
|
95 |
- |
|
96 |
-INIT_MMX mmxext |
|
97 |
-PUT_NO_RND_PIXELS8_L2 |
|
98 |
- |
|
99 |
- |
|
100 |
-; void ff_put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
|
101 |
-%macro PUT_NO_RND_PIXELS16_l2 0 |
|
102 |
-cglobal put_no_rnd_pixels16_l2, 6,6 |
|
103 |
- movsxdifnidn r3, r3d |
|
104 |
- movsxdifnidn r4, r4d |
|
105 |
- pcmpeqb m6, m6 |
|
106 |
- test r5d, 1 |
|
107 |
- je .loop |
|
108 |
- mova m0, [r1] |
|
109 |
- mova m1, [r1+8] |
|
110 |
- mova m2, [r2] |
|
111 |
- mova m3, [r2+8] |
|
112 |
- pxor m0, m6 |
|
113 |
- pxor m1, m6 |
|
114 |
- pxor m2, m6 |
|
115 |
- pxor m3, m6 |
|
116 |
- PAVGB m0, m2 |
|
117 |
- PAVGB m1, m3 |
|
118 |
- pxor m0, m6 |
|
119 |
- pxor m1, m6 |
|
120 |
- add r1, r4 |
|
121 |
- add r2, 16 |
|
122 |
- mova [r0], m0 |
|
123 |
- mova [r0+8], m1 |
|
124 |
- add r0, r3 |
|
125 |
- dec r5d |
|
126 |
-.loop: |
|
127 |
- mova m0, [r1] |
|
128 |
- mova m1, [r1+8] |
|
129 |
- add r1, r4 |
|
130 |
- mova m2, [r2] |
|
131 |
- mova m3, [r2+8] |
|
132 |
- pxor m0, m6 |
|
133 |
- pxor m1, m6 |
|
134 |
- pxor m2, m6 |
|
135 |
- pxor m3, m6 |
|
136 |
- PAVGB m0, m2 |
|
137 |
- PAVGB m1, m3 |
|
138 |
- pxor m0, m6 |
|
139 |
- pxor m1, m6 |
|
140 |
- mova [r0], m0 |
|
141 |
- mova [r0+8], m1 |
|
142 |
- add r0, r3 |
|
143 |
- mova m0, [r1] |
|
144 |
- mova m1, [r1+8] |
|
145 |
- add r1, r4 |
|
146 |
- mova m2, [r2+16] |
|
147 |
- mova m3, [r2+24] |
|
148 |
- pxor m0, m6 |
|
149 |
- pxor m1, m6 |
|
150 |
- pxor m2, m6 |
|
151 |
- pxor m3, m6 |
|
152 |
- PAVGB m0, m2 |
|
153 |
- PAVGB m1, m3 |
|
154 |
- pxor m0, m6 |
|
155 |
- pxor m1, m6 |
|
156 |
- mova [r0], m0 |
|
157 |
- mova [r0+8], m1 |
|
158 |
- add r0, r3 |
|
159 |
- add r2, 32 |
|
160 |
- sub r5d, 2 |
|
161 |
- jne .loop |
|
162 |
- REP_RET |
|
163 |
-%endmacro |
|
164 |
- |
|
165 |
-INIT_MMX mmxext |
|
166 |
-PUT_NO_RND_PIXELS16_l2 |
|
167 |
-INIT_MMX 3dnow |
|
168 |
-PUT_NO_RND_PIXELS16_l2 |
|
169 |
- |
|
170 |
-%macro MPEG4_QPEL16_H_LOWPASS 1 |
|
171 |
-cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16 |
|
172 |
- movsxdifnidn r2, r2d |
|
173 |
- movsxdifnidn r3, r3d |
|
174 |
- pxor m7, m7 |
|
175 |
-.loop: |
|
176 |
- mova m0, [r1] |
|
177 |
- mova m1, m0 |
|
178 |
- mova m2, m0 |
|
179 |
- punpcklbw m0, m7 |
|
180 |
- punpckhbw m1, m7 |
|
181 |
- pshufw m5, m0, 0x90 |
|
182 |
- pshufw m6, m0, 0x41 |
|
183 |
- mova m3, m2 |
|
184 |
- mova m4, m2 |
|
185 |
- psllq m2, 8 |
|
186 |
- psllq m3, 16 |
|
187 |
- psllq m4, 24 |
|
188 |
- punpckhbw m2, m7 |
|
189 |
- punpckhbw m3, m7 |
|
190 |
- punpckhbw m4, m7 |
|
191 |
- paddw m5, m3 |
|
192 |
- paddw m6, m2 |
|
193 |
- paddw m5, m5 |
|
194 |
- psubw m6, m5 |
|
195 |
- pshufw m5, m0, 6 |
|
196 |
- pmullw m6, [pw_3] |
|
197 |
- paddw m0, m4 |
|
198 |
- paddw m5, m1 |
|
199 |
- pmullw m0, [pw_20] |
|
200 |
- psubw m0, m5 |
|
201 |
- paddw m6, [PW_ROUND] |
|
202 |
- paddw m0, m6 |
|
203 |
- psraw m0, 5 |
|
204 |
- mova [rsp+8], m0 |
|
205 |
- mova m0, [r1+5] |
|
206 |
- mova m5, m0 |
|
207 |
- mova m6, m0 |
|
208 |
- psrlq m0, 8 |
|
209 |
- psrlq m5, 16 |
|
210 |
- punpcklbw m0, m7 |
|
211 |
- punpcklbw m5, m7 |
|
212 |
- paddw m2, m0 |
|
213 |
- paddw m3, m5 |
|
214 |
- paddw m2, m2 |
|
215 |
- psubw m3, m2 |
|
216 |
- mova m2, m6 |
|
217 |
- psrlq m6, 24 |
|
218 |
- punpcklbw m2, m7 |
|
219 |
- punpcklbw m6, m7 |
|
220 |
- pmullw m3, [pw_3] |
|
221 |
- paddw m1, m2 |
|
222 |
- paddw m4, m6 |
|
223 |
- pmullw m1, [pw_20] |
|
224 |
- psubw m3, m4 |
|
225 |
- paddw m1, [PW_ROUND] |
|
226 |
- paddw m3, m1 |
|
227 |
- psraw m3, 5 |
|
228 |
- mova m1, [rsp+8] |
|
229 |
- packuswb m1, m3 |
|
230 |
- OP_MOV [r0], m1, m4 |
|
231 |
- mova m1, [r1+9] |
|
232 |
- mova m4, m1 |
|
233 |
- mova m3, m1 |
|
234 |
- psrlq m1, 8 |
|
235 |
- psrlq m4, 16 |
|
236 |
- punpcklbw m1, m7 |
|
237 |
- punpcklbw m4, m7 |
|
238 |
- paddw m5, m1 |
|
239 |
- paddw m0, m4 |
|
240 |
- paddw m5, m5 |
|
241 |
- psubw m0, m5 |
|
242 |
- mova m5, m3 |
|
243 |
- psrlq m3, 24 |
|
244 |
- pmullw m0, [pw_3] |
|
245 |
- punpcklbw m3, m7 |
|
246 |
- paddw m2, m3 |
|
247 |
- psubw m0, m2 |
|
248 |
- mova m2, m5 |
|
249 |
- punpcklbw m2, m7 |
|
250 |
- punpckhbw m5, m7 |
|
251 |
- paddw m6, m2 |
|
252 |
- pmullw m6, [pw_20] |
|
253 |
- paddw m0, [PW_ROUND] |
|
254 |
- paddw m0, m6 |
|
255 |
- psraw m0, 5 |
|
256 |
- paddw m3, m5 |
|
257 |
- pshufw m6, m5, 0xf9 |
|
258 |
- paddw m6, m4 |
|
259 |
- pshufw m4, m5, 0xbe |
|
260 |
- pshufw m5, m5, 0x6f |
|
261 |
- paddw m4, m1 |
|
262 |
- paddw m5, m2 |
|
263 |
- paddw m6, m6 |
|
264 |
- psubw m4, m6 |
|
265 |
- pmullw m3, [pw_20] |
|
266 |
- pmullw m4, [pw_3] |
|
267 |
- psubw m3, m5 |
|
268 |
- paddw m4, [PW_ROUND] |
|
269 |
- paddw m4, m3 |
|
270 |
- psraw m4, 5 |
|
271 |
- packuswb m0, m4 |
|
272 |
- OP_MOV [r0+8], m0, m4 |
|
273 |
- add r1, r3 |
|
274 |
- add r0, r2 |
|
275 |
- dec r4d |
|
276 |
- jne .loop |
|
277 |
- REP_RET |
|
278 |
-%endmacro |
|
279 |
- |
|
280 |
-%macro PUT_OP 2-3 |
|
281 |
- mova %1, %2 |
|
282 |
-%endmacro |
|
283 |
- |
|
284 |
-%macro AVG_OP 2-3 |
|
285 |
- mova %3, %1 |
|
286 |
- pavgb %2, %3 |
|
287 |
- mova %1, %2 |
|
288 |
-%endmacro |
|
289 |
- |
|
290 |
-INIT_MMX mmxext |
|
291 |
-%define PW_ROUND pw_16 |
|
292 |
-%define OP_MOV PUT_OP |
|
293 |
-MPEG4_QPEL16_H_LOWPASS put |
|
294 |
-%define PW_ROUND pw_16 |
|
295 |
-%define OP_MOV AVG_OP |
|
296 |
-MPEG4_QPEL16_H_LOWPASS avg |
|
297 |
-%define PW_ROUND pw_15 |
|
298 |
-%define OP_MOV PUT_OP |
|
299 |
-MPEG4_QPEL16_H_LOWPASS put_no_rnd |
|
300 |
- |
|
301 |
- |
|
302 |
- |
|
303 |
-%macro MPEG4_QPEL8_H_LOWPASS 1 |
|
304 |
-cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8 |
|
305 |
- movsxdifnidn r2, r2d |
|
306 |
- movsxdifnidn r3, r3d |
|
307 |
- pxor m7, m7 |
|
308 |
-.loop: |
|
309 |
- mova m0, [r1] |
|
310 |
- mova m1, m0 |
|
311 |
- mova m2, m0 |
|
312 |
- punpcklbw m0, m7 |
|
313 |
- punpckhbw m1, m7 |
|
314 |
- pshufw m5, m0, 0x90 |
|
315 |
- pshufw m6, m0, 0x41 |
|
316 |
- mova m3, m2 |
|
317 |
- mova m4, m2 |
|
318 |
- psllq m2, 8 |
|
319 |
- psllq m3, 16 |
|
320 |
- psllq m4, 24 |
|
321 |
- punpckhbw m2, m7 |
|
322 |
- punpckhbw m3, m7 |
|
323 |
- punpckhbw m4, m7 |
|
324 |
- paddw m5, m3 |
|
325 |
- paddw m6, m2 |
|
326 |
- paddw m5, m5 |
|
327 |
- psubw m6, m5 |
|
328 |
- pshufw m5, m0, 0x6 |
|
329 |
- pmullw m6, [pw_3] |
|
330 |
- paddw m0, m4 |
|
331 |
- paddw m5, m1 |
|
332 |
- pmullw m0, [pw_20] |
|
333 |
- psubw m0, m5 |
|
334 |
- paddw m6, [PW_ROUND] |
|
335 |
- paddw m0, m6 |
|
336 |
- psraw m0, 5 |
|
337 |
- movh m5, [r1+5] |
|
338 |
- punpcklbw m5, m7 |
|
339 |
- pshufw m6, m5, 0xf9 |
|
340 |
- paddw m1, m5 |
|
341 |
- paddw m2, m6 |
|
342 |
- pshufw m6, m5, 0xbe |
|
343 |
- pshufw m5, m5, 0x6f |
|
344 |
- paddw m3, m6 |
|
345 |
- paddw m4, m5 |
|
346 |
- paddw m2, m2 |
|
347 |
- psubw m3, m2 |
|
348 |
- pmullw m1, [pw_20] |
|
349 |
- pmullw m3, [pw_3] |
|
350 |
- psubw m3, m4 |
|
351 |
- paddw m1, [PW_ROUND] |
|
352 |
- paddw m3, m1 |
|
353 |
- psraw m3, 5 |
|
354 |
- packuswb m0, m3 |
|
355 |
- OP_MOV [r0], m0, m4 |
|
356 |
- add r1, r3 |
|
357 |
- add r0, r2 |
|
358 |
- dec r4d |
|
359 |
- jne .loop |
|
360 |
- REP_RET |
|
361 |
-%endmacro |
|
362 |
- |
|
363 |
-INIT_MMX mmxext |
|
364 |
-%define PW_ROUND pw_16 |
|
365 |
-%define OP_MOV PUT_OP |
|
366 |
-MPEG4_QPEL8_H_LOWPASS put |
|
367 |
-%define PW_ROUND pw_16 |
|
368 |
-%define OP_MOV AVG_OP |
|
369 |
-MPEG4_QPEL8_H_LOWPASS avg |
|
370 |
-%define PW_ROUND pw_15 |
|
371 |
-%define OP_MOV PUT_OP |
|
372 |
-MPEG4_QPEL8_H_LOWPASS put_no_rnd |
|
373 |
- |
|
374 |
- |
|
375 |
- |
|
376 |
-%macro QPEL_V_LOW 5 |
|
377 |
- paddw m0, m1 |
|
378 |
- mova m4, [pw_20] |
|
379 |
- pmullw m4, m0 |
|
380 |
- mova m0, %4 |
|
381 |
- mova m5, %1 |
|
382 |
- paddw m5, m0 |
|
383 |
- psubw m4, m5 |
|
384 |
- mova m5, %2 |
|
385 |
- mova m6, %3 |
|
386 |
- paddw m5, m3 |
|
387 |
- paddw m6, m2 |
|
388 |
- paddw m6, m6 |
|
389 |
- psubw m5, m6 |
|
390 |
- pmullw m5, [pw_3] |
|
391 |
- paddw m4, [PW_ROUND] |
|
392 |
- paddw m5, m4 |
|
393 |
- psraw m5, 5 |
|
394 |
- packuswb m5, m5 |
|
395 |
- OP_MOV %5, m5, m7 |
|
396 |
- SWAP 0,1,2,3 |
|
397 |
-%endmacro |
|
398 |
- |
|
399 |
-%macro MPEG4_QPEL16_V_LOWPASS 1 |
|
400 |
-cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544 |
|
401 |
- movsxdifnidn r2, r2d |
|
402 |
- movsxdifnidn r3, r3d |
|
403 |
- |
|
404 |
- mov r4d, 17 |
|
405 |
- mov r5, rsp |
|
406 |
- pxor m7, m7 |
|
407 |
-.looph: |
|
408 |
- mova m0, [r1] |
|
409 |
- mova m1, [r1] |
|
410 |
- mova m2, [r1+8] |
|
411 |
- mova m3, [r1+8] |
|
412 |
- punpcklbw m0, m7 |
|
413 |
- punpckhbw m1, m7 |
|
414 |
- punpcklbw m2, m7 |
|
415 |
- punpckhbw m3, m7 |
|
416 |
- mova [r5], m0 |
|
417 |
- mova [r5+0x88], m1 |
|
418 |
- mova [r5+0x110], m2 |
|
419 |
- mova [r5+0x198], m3 |
|
420 |
- add r5, 8 |
|
421 |
- add r1, r3 |
|
422 |
- dec r4d |
|
423 |
- jne .looph |
|
424 |
- |
|
425 |
- |
|
426 |
- ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 14*dstStride |
|
427 |
- mov r4d, 4 |
|
428 |
- mov r1, 4 |
|
429 |
- neg r2 |
|
430 |
- lea r1, [r1+r2*8] |
|
431 |
- lea r1, [r1+r2*4] |
|
432 |
- lea r1, [r1+r2*2] |
|
433 |
- neg r2 |
|
434 |
- mov r5, rsp |
|
435 |
-.loopv: |
|
436 |
- pxor m7, m7 |
|
437 |
- mova m0, [r5+ 0x0] |
|
438 |
- mova m1, [r5+ 0x8] |
|
439 |
- mova m2, [r5+0x10] |
|
440 |
- mova m3, [r5+0x18] |
|
441 |
- QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0] |
|
442 |
- QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2] |
|
443 |
- lea r0, [r0+r2*2] |
|
444 |
- QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0] |
|
445 |
- QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2] |
|
446 |
- lea r0, [r0+r2*2] |
|
447 |
- QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0] |
|
448 |
- QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x48], [r0+r2] |
|
449 |
- lea r0, [r0+r2*2] |
|
450 |
- QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x50], [r0] |
|
451 |
- QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x58], [r0+r2] |
|
452 |
- lea r0, [r0+r2*2] |
|
453 |
- QPEL_V_LOW [r5+0x28], [r5+0x30], [r5+0x38], [r5+0x60], [r0] |
|
454 |
- QPEL_V_LOW [r5+0x30], [r5+0x38], [r5+0x40], [r5+0x68], [r0+r2] |
|
455 |
- lea r0, [r0+r2*2] |
|
456 |
- QPEL_V_LOW [r5+0x38], [r5+0x40], [r5+0x48], [r5+0x70], [r0] |
|
457 |
- QPEL_V_LOW [r5+0x40], [r5+0x48], [r5+0x50], [r5+0x78], [r0+r2] |
|
458 |
- lea r0, [r0+r2*2] |
|
459 |
- QPEL_V_LOW [r5+0x48], [r5+0x50], [r5+0x58], [r5+0x80], [r0] |
|
460 |
- QPEL_V_LOW [r5+0x50], [r5+0x58], [r5+0x60], [r5+0x80], [r0+r2] |
|
461 |
- lea r0, [r0+r2*2] |
|
462 |
- QPEL_V_LOW [r5+0x58], [r5+0x60], [r5+0x68], [r5+0x78], [r0] |
|
463 |
- QPEL_V_LOW [r5+0x60], [r5+0x68], [r5+0x70], [r5+0x70], [r0+r2] |
|
464 |
- |
|
465 |
- add r5, 0x88 |
|
466 |
- add r0, r1 |
|
467 |
- dec r4d |
|
468 |
- jne .loopv |
|
469 |
- REP_RET |
|
470 |
-%endmacro |
|
471 |
- |
|
472 |
-%macro PUT_OPH 2-3 |
|
473 |
- movh %1, %2 |
|
474 |
-%endmacro |
|
475 |
- |
|
476 |
-%macro AVG_OPH 2-3 |
|
477 |
- movh %3, %1 |
|
478 |
- pavgb %2, %3 |
|
479 |
- movh %1, %2 |
|
480 |
-%endmacro |
|
481 |
- |
|
482 |
-INIT_MMX mmxext |
|
483 |
-%define PW_ROUND pw_16 |
|
484 |
-%define OP_MOV PUT_OPH |
|
485 |
-MPEG4_QPEL16_V_LOWPASS put |
|
486 |
-%define PW_ROUND pw_16 |
|
487 |
-%define OP_MOV AVG_OPH |
|
488 |
-MPEG4_QPEL16_V_LOWPASS avg |
|
489 |
-%define PW_ROUND pw_15 |
|
490 |
-%define OP_MOV PUT_OPH |
|
491 |
-MPEG4_QPEL16_V_LOWPASS put_no_rnd |
|
492 |
- |
|
493 |
- |
|
494 |
- |
|
495 |
-%macro MPEG4_QPEL8_V_LOWPASS 1 |
|
496 |
-cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288 |
|
497 |
- movsxdifnidn r2, r2d |
|
498 |
- movsxdifnidn r3, r3d |
|
499 |
- |
|
500 |
- mov r4d, 9 |
|
501 |
- mov r5, rsp |
|
502 |
- pxor m7, m7 |
|
503 |
-.looph: |
|
504 |
- mova m0, [r1] |
|
505 |
- mova m1, [r1] |
|
506 |
- punpcklbw m0, m7 |
|
507 |
- punpckhbw m1, m7 |
|
508 |
- mova [r5], m0 |
|
509 |
- mova [r5+0x48], m1 |
|
510 |
- add r5, 8 |
|
511 |
- add r1, r3 |
|
512 |
- dec r4d |
|
513 |
- jne .looph |
|
514 |
- |
|
515 |
- |
|
516 |
- ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 6*dstStride |
|
517 |
- mov r4d, 2 |
|
518 |
- mov r1, 4 |
|
519 |
- neg r2 |
|
520 |
- lea r1, [r1+r2*4] |
|
521 |
- lea r1, [r1+r2*2] |
|
522 |
- neg r2 |
|
523 |
- mov r5, rsp |
|
524 |
-.loopv: |
|
525 |
- pxor m7, m7 |
|
526 |
- mova m0, [r5+ 0x0] |
|
527 |
- mova m1, [r5+ 0x8] |
|
528 |
- mova m2, [r5+0x10] |
|
529 |
- mova m3, [r5+0x18] |
|
530 |
- QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0] |
|
531 |
- QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2] |
|
532 |
- lea r0, [r0+r2*2] |
|
533 |
- QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0] |
|
534 |
- QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2] |
|
535 |
- lea r0, [r0+r2*2] |
|
536 |
- QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0] |
|
537 |
- QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x40], [r0+r2] |
|
538 |
- lea r0, [r0+r2*2] |
|
539 |
- QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x38], [r0] |
|
540 |
- QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x30], [r0+r2] |
|
541 |
- |
|
542 |
- add r5, 0x48 |
|
543 |
- add r0, r1 |
|
544 |
- dec r4d |
|
545 |
- jne .loopv |
|
546 |
- REP_RET |
|
547 |
-%endmacro |
|
548 |
- |
|
549 |
-INIT_MMX mmxext |
|
550 |
-%define PW_ROUND pw_16 |
|
551 |
-%define OP_MOV PUT_OPH |
|
552 |
-MPEG4_QPEL8_V_LOWPASS put |
|
553 |
-%define PW_ROUND pw_16 |
|
554 |
-%define OP_MOV AVG_OPH |
|
555 |
-MPEG4_QPEL8_V_LOWPASS avg |
|
556 |
-%define PW_ROUND pw_15 |
|
557 |
-%define OP_MOV PUT_OPH |
|
558 |
-MPEG4_QPEL8_V_LOWPASS put_no_rnd |
559 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,559 @@ |
0 |
+;****************************************************************************** |
|
1 |
+;* quarterpel DSP functions |
|
2 |
+;* |
|
3 |
+;* Copyright (c) 2008 Loren Merritt |
|
4 |
+;* |
|
5 |
+;* This file is part of Libav. |
|
6 |
+;* |
|
7 |
+;* Libav is free software; you can redistribute it and/or |
|
8 |
+;* modify it under the terms of the GNU Lesser General Public |
|
9 |
+;* License as published by the Free Software Foundation; either |
|
10 |
+;* version 2.1 of the License, or (at your option) any later version. |
|
11 |
+;* |
|
12 |
+;* Libav is distributed in the hope that it will be useful, |
|
13 |
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 |
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
15 |
+;* Lesser General Public License for more details. |
|
16 |
+;* |
|
17 |
+;* You should have received a copy of the GNU Lesser General Public |
|
18 |
+;* License along with Libav; if not, write to the Free Software |
|
19 |
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
20 |
+;****************************************************************************** |
|
21 |
+ |
|
22 |
+%include "libavutil/x86/x86util.asm" |
|
23 |
+ |
|
24 |
+SECTION_RODATA |
|
25 |
+cextern pb_1 |
|
26 |
+cextern pw_3 |
|
27 |
+cextern pw_15 |
|
28 |
+cextern pw_16 |
|
29 |
+cextern pw_20 |
|
30 |
+ |
|
31 |
+ |
|
32 |
+SECTION_TEXT |
|
33 |
+ |
|
34 |
+; void ff_put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
|
35 |
+%macro PUT_NO_RND_PIXELS8_L2 0 |
|
36 |
+cglobal put_no_rnd_pixels8_l2, 6,6 |
|
37 |
+ movsxdifnidn r4, r4d |
|
38 |
+ movsxdifnidn r3, r3d |
|
39 |
+ pcmpeqb m6, m6 |
|
40 |
+ test r5d, 1 |
|
41 |
+ je .loop |
|
42 |
+ mova m0, [r1] |
|
43 |
+ mova m1, [r2] |
|
44 |
+ add r1, r4 |
|
45 |
+ add r2, 8 |
|
46 |
+ pxor m0, m6 |
|
47 |
+ pxor m1, m6 |
|
48 |
+ PAVGB m0, m1 |
|
49 |
+ pxor m0, m6 |
|
50 |
+ mova [r0], m0 |
|
51 |
+ add r0, r3 |
|
52 |
+ dec r5d |
|
53 |
+.loop: |
|
54 |
+ mova m0, [r1] |
|
55 |
+ add r1, r4 |
|
56 |
+ mova m1, [r1] |
|
57 |
+ add r1, r4 |
|
58 |
+ mova m2, [r2] |
|
59 |
+ mova m3, [r2+8] |
|
60 |
+ pxor m0, m6 |
|
61 |
+ pxor m1, m6 |
|
62 |
+ pxor m2, m6 |
|
63 |
+ pxor m3, m6 |
|
64 |
+ PAVGB m0, m2 |
|
65 |
+ PAVGB m1, m3 |
|
66 |
+ pxor m0, m6 |
|
67 |
+ pxor m1, m6 |
|
68 |
+ mova [r0], m0 |
|
69 |
+ add r0, r3 |
|
70 |
+ mova [r0], m1 |
|
71 |
+ add r0, r3 |
|
72 |
+ mova m0, [r1] |
|
73 |
+ add r1, r4 |
|
74 |
+ mova m1, [r1] |
|
75 |
+ add r1, r4 |
|
76 |
+ mova m2, [r2+16] |
|
77 |
+ mova m3, [r2+24] |
|
78 |
+ pxor m0, m6 |
|
79 |
+ pxor m1, m6 |
|
80 |
+ pxor m2, m6 |
|
81 |
+ pxor m3, m6 |
|
82 |
+ PAVGB m0, m2 |
|
83 |
+ PAVGB m1, m3 |
|
84 |
+ pxor m0, m6 |
|
85 |
+ pxor m1, m6 |
|
86 |
+ mova [r0], m0 |
|
87 |
+ add r0, r3 |
|
88 |
+ mova [r0], m1 |
|
89 |
+ add r0, r3 |
|
90 |
+ add r2, 32 |
|
91 |
+ sub r5d, 4 |
|
92 |
+ jne .loop |
|
93 |
+ REP_RET |
|
94 |
+%endmacro |
|
95 |
+ |
|
96 |
+INIT_MMX mmxext |
|
97 |
+PUT_NO_RND_PIXELS8_L2 |
|
98 |
+ |
|
99 |
+ |
|
100 |
+; void ff_put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
|
101 |
+%macro PUT_NO_RND_PIXELS16_l2 0 |
|
102 |
+cglobal put_no_rnd_pixels16_l2, 6,6 |
|
103 |
+ movsxdifnidn r3, r3d |
|
104 |
+ movsxdifnidn r4, r4d |
|
105 |
+ pcmpeqb m6, m6 |
|
106 |
+ test r5d, 1 |
|
107 |
+ je .loop |
|
108 |
+ mova m0, [r1] |
|
109 |
+ mova m1, [r1+8] |
|
110 |
+ mova m2, [r2] |
|
111 |
+ mova m3, [r2+8] |
|
112 |
+ pxor m0, m6 |
|
113 |
+ pxor m1, m6 |
|
114 |
+ pxor m2, m6 |
|
115 |
+ pxor m3, m6 |
|
116 |
+ PAVGB m0, m2 |
|
117 |
+ PAVGB m1, m3 |
|
118 |
+ pxor m0, m6 |
|
119 |
+ pxor m1, m6 |
|
120 |
+ add r1, r4 |
|
121 |
+ add r2, 16 |
|
122 |
+ mova [r0], m0 |
|
123 |
+ mova [r0+8], m1 |
|
124 |
+ add r0, r3 |
|
125 |
+ dec r5d |
|
126 |
+.loop: |
|
127 |
+ mova m0, [r1] |
|
128 |
+ mova m1, [r1+8] |
|
129 |
+ add r1, r4 |
|
130 |
+ mova m2, [r2] |
|
131 |
+ mova m3, [r2+8] |
|
132 |
+ pxor m0, m6 |
|
133 |
+ pxor m1, m6 |
|
134 |
+ pxor m2, m6 |
|
135 |
+ pxor m3, m6 |
|
136 |
+ PAVGB m0, m2 |
|
137 |
+ PAVGB m1, m3 |
|
138 |
+ pxor m0, m6 |
|
139 |
+ pxor m1, m6 |
|
140 |
+ mova [r0], m0 |
|
141 |
+ mova [r0+8], m1 |
|
142 |
+ add r0, r3 |
|
143 |
+ mova m0, [r1] |
|
144 |
+ mova m1, [r1+8] |
|
145 |
+ add r1, r4 |
|
146 |
+ mova m2, [r2+16] |
|
147 |
+ mova m3, [r2+24] |
|
148 |
+ pxor m0, m6 |
|
149 |
+ pxor m1, m6 |
|
150 |
+ pxor m2, m6 |
|
151 |
+ pxor m3, m6 |
|
152 |
+ PAVGB m0, m2 |
|
153 |
+ PAVGB m1, m3 |
|
154 |
+ pxor m0, m6 |
|
155 |
+ pxor m1, m6 |
|
156 |
+ mova [r0], m0 |
|
157 |
+ mova [r0+8], m1 |
|
158 |
+ add r0, r3 |
|
159 |
+ add r2, 32 |
|
160 |
+ sub r5d, 2 |
|
161 |
+ jne .loop |
|
162 |
+ REP_RET |
|
163 |
+%endmacro |
|
164 |
+ |
|
165 |
+INIT_MMX mmxext |
|
166 |
+PUT_NO_RND_PIXELS16_l2 |
|
167 |
+INIT_MMX 3dnow |
|
168 |
+PUT_NO_RND_PIXELS16_l2 |
|
169 |
+ |
|
170 |
+%macro MPEG4_QPEL16_H_LOWPASS 1 |
|
171 |
+cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16 |
|
172 |
+ movsxdifnidn r2, r2d |
|
173 |
+ movsxdifnidn r3, r3d |
|
174 |
+ pxor m7, m7 |
|
175 |
+.loop: |
|
176 |
+ mova m0, [r1] |
|
177 |
+ mova m1, m0 |
|
178 |
+ mova m2, m0 |
|
179 |
+ punpcklbw m0, m7 |
|
180 |
+ punpckhbw m1, m7 |
|
181 |
+ pshufw m5, m0, 0x90 |
|
182 |
+ pshufw m6, m0, 0x41 |
|
183 |
+ mova m3, m2 |
|
184 |
+ mova m4, m2 |
|
185 |
+ psllq m2, 8 |
|
186 |
+ psllq m3, 16 |
|
187 |
+ psllq m4, 24 |
|
188 |
+ punpckhbw m2, m7 |
|
189 |
+ punpckhbw m3, m7 |
|
190 |
+ punpckhbw m4, m7 |
|
191 |
+ paddw m5, m3 |
|
192 |
+ paddw m6, m2 |
|
193 |
+ paddw m5, m5 |
|
194 |
+ psubw m6, m5 |
|
195 |
+ pshufw m5, m0, 6 |
|
196 |
+ pmullw m6, [pw_3] |
|
197 |
+ paddw m0, m4 |
|
198 |
+ paddw m5, m1 |
|
199 |
+ pmullw m0, [pw_20] |
|
200 |
+ psubw m0, m5 |
|
201 |
+ paddw m6, [PW_ROUND] |
|
202 |
+ paddw m0, m6 |
|
203 |
+ psraw m0, 5 |
|
204 |
+ mova [rsp+8], m0 |
|
205 |
+ mova m0, [r1+5] |
|
206 |
+ mova m5, m0 |
|
207 |
+ mova m6, m0 |
|
208 |
+ psrlq m0, 8 |
|
209 |
+ psrlq m5, 16 |
|
210 |
+ punpcklbw m0, m7 |
|
211 |
+ punpcklbw m5, m7 |
|
212 |
+ paddw m2, m0 |
|
213 |
+ paddw m3, m5 |
|
214 |
+ paddw m2, m2 |
|
215 |
+ psubw m3, m2 |
|
216 |
+ mova m2, m6 |
|
217 |
+ psrlq m6, 24 |
|
218 |
+ punpcklbw m2, m7 |
|
219 |
+ punpcklbw m6, m7 |
|
220 |
+ pmullw m3, [pw_3] |
|
221 |
+ paddw m1, m2 |
|
222 |
+ paddw m4, m6 |
|
223 |
+ pmullw m1, [pw_20] |
|
224 |
+ psubw m3, m4 |
|
225 |
+ paddw m1, [PW_ROUND] |
|
226 |
+ paddw m3, m1 |
|
227 |
+ psraw m3, 5 |
|
228 |
+ mova m1, [rsp+8] |
|
229 |
+ packuswb m1, m3 |
|
230 |
+ OP_MOV [r0], m1, m4 |
|
231 |
+ mova m1, [r1+9] |
|
232 |
+ mova m4, m1 |
|
233 |
+ mova m3, m1 |
|
234 |
+ psrlq m1, 8 |
|
235 |
+ psrlq m4, 16 |
|
236 |
+ punpcklbw m1, m7 |
|
237 |
+ punpcklbw m4, m7 |
|
238 |
+ paddw m5, m1 |
|
239 |
+ paddw m0, m4 |
|
240 |
+ paddw m5, m5 |
|
241 |
+ psubw m0, m5 |
|
242 |
+ mova m5, m3 |
|
243 |
+ psrlq m3, 24 |
|
244 |
+ pmullw m0, [pw_3] |
|
245 |
+ punpcklbw m3, m7 |
|
246 |
+ paddw m2, m3 |
|
247 |
+ psubw m0, m2 |
|
248 |
+ mova m2, m5 |
|
249 |
+ punpcklbw m2, m7 |
|
250 |
+ punpckhbw m5, m7 |
|
251 |
+ paddw m6, m2 |
|
252 |
+ pmullw m6, [pw_20] |
|
253 |
+ paddw m0, [PW_ROUND] |
|
254 |
+ paddw m0, m6 |
|
255 |
+ psraw m0, 5 |
|
256 |
+ paddw m3, m5 |
|
257 |
+ pshufw m6, m5, 0xf9 |
|
258 |
+ paddw m6, m4 |
|
259 |
+ pshufw m4, m5, 0xbe |
|
260 |
+ pshufw m5, m5, 0x6f |
|
261 |
+ paddw m4, m1 |
|
262 |
+ paddw m5, m2 |
|
263 |
+ paddw m6, m6 |
|
264 |
+ psubw m4, m6 |
|
265 |
+ pmullw m3, [pw_20] |
|
266 |
+ pmullw m4, [pw_3] |
|
267 |
+ psubw m3, m5 |
|
268 |
+ paddw m4, [PW_ROUND] |
|
269 |
+ paddw m4, m3 |
|
270 |
+ psraw m4, 5 |
|
271 |
+ packuswb m0, m4 |
|
272 |
+ OP_MOV [r0+8], m0, m4 |
|
273 |
+ add r1, r3 |
|
274 |
+ add r0, r2 |
|
275 |
+ dec r4d |
|
276 |
+ jne .loop |
|
277 |
+ REP_RET |
|
278 |
+%endmacro |
|
279 |
+ |
|
280 |
+%macro PUT_OP 2-3 |
|
281 |
+ mova %1, %2 |
|
282 |
+%endmacro |
|
283 |
+ |
|
284 |
+%macro AVG_OP 2-3 |
|
285 |
+ mova %3, %1 |
|
286 |
+ pavgb %2, %3 |
|
287 |
+ mova %1, %2 |
|
288 |
+%endmacro |
|
289 |
+ |
|
290 |
+INIT_MMX mmxext |
|
291 |
+%define PW_ROUND pw_16 |
|
292 |
+%define OP_MOV PUT_OP |
|
293 |
+MPEG4_QPEL16_H_LOWPASS put |
|
294 |
+%define PW_ROUND pw_16 |
|
295 |
+%define OP_MOV AVG_OP |
|
296 |
+MPEG4_QPEL16_H_LOWPASS avg |
|
297 |
+%define PW_ROUND pw_15 |
|
298 |
+%define OP_MOV PUT_OP |
|
299 |
+MPEG4_QPEL16_H_LOWPASS put_no_rnd |
|
300 |
+ |
|
301 |
+ |
|
302 |
+ |
|
303 |
+%macro MPEG4_QPEL8_H_LOWPASS 1 |
|
304 |
+cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8 |
|
305 |
+ movsxdifnidn r2, r2d |
|
306 |
+ movsxdifnidn r3, r3d |
|
307 |
+ pxor m7, m7 |
|
308 |
+.loop: |
|
309 |
+ mova m0, [r1] |
|
310 |
+ mova m1, m0 |
|
311 |
+ mova m2, m0 |
|
312 |
+ punpcklbw m0, m7 |
|
313 |
+ punpckhbw m1, m7 |
|
314 |
+ pshufw m5, m0, 0x90 |
|
315 |
+ pshufw m6, m0, 0x41 |
|
316 |
+ mova m3, m2 |
|
317 |
+ mova m4, m2 |
|
318 |
+ psllq m2, 8 |
|
319 |
+ psllq m3, 16 |
|
320 |
+ psllq m4, 24 |
|
321 |
+ punpckhbw m2, m7 |
|
322 |
+ punpckhbw m3, m7 |
|
323 |
+ punpckhbw m4, m7 |
|
324 |
+ paddw m5, m3 |
|
325 |
+ paddw m6, m2 |
|
326 |
+ paddw m5, m5 |
|
327 |
+ psubw m6, m5 |
|
328 |
+ pshufw m5, m0, 0x6 |
|
329 |
+ pmullw m6, [pw_3] |
|
330 |
+ paddw m0, m4 |
|
331 |
+ paddw m5, m1 |
|
332 |
+ pmullw m0, [pw_20] |
|
333 |
+ psubw m0, m5 |
|
334 |
+ paddw m6, [PW_ROUND] |
|
335 |
+ paddw m0, m6 |
|
336 |
+ psraw m0, 5 |
|
337 |
+ movh m5, [r1+5] |
|
338 |
+ punpcklbw m5, m7 |
|
339 |
+ pshufw m6, m5, 0xf9 |
|
340 |
+ paddw m1, m5 |
|
341 |
+ paddw m2, m6 |
|
342 |
+ pshufw m6, m5, 0xbe |
|
343 |
+ pshufw m5, m5, 0x6f |
|
344 |
+ paddw m3, m6 |
|
345 |
+ paddw m4, m5 |
|
346 |
+ paddw m2, m2 |
|
347 |
+ psubw m3, m2 |
|
348 |
+ pmullw m1, [pw_20] |
|
349 |
+ pmullw m3, [pw_3] |
|
350 |
+ psubw m3, m4 |
|
351 |
+ paddw m1, [PW_ROUND] |
|
352 |
+ paddw m3, m1 |
|
353 |
+ psraw m3, 5 |
|
354 |
+ packuswb m0, m3 |
|
355 |
+ OP_MOV [r0], m0, m4 |
|
356 |
+ add r1, r3 |
|
357 |
+ add r0, r2 |
|
358 |
+ dec r4d |
|
359 |
+ jne .loop |
|
360 |
+ REP_RET |
|
361 |
+%endmacro |
|
362 |
+ |
|
363 |
+INIT_MMX mmxext |
|
364 |
+%define PW_ROUND pw_16 |
|
365 |
+%define OP_MOV PUT_OP |
|
366 |
+MPEG4_QPEL8_H_LOWPASS put |
|
367 |
+%define PW_ROUND pw_16 |
|
368 |
+%define OP_MOV AVG_OP |
|
369 |
+MPEG4_QPEL8_H_LOWPASS avg |
|
370 |
+%define PW_ROUND pw_15 |
|
371 |
+%define OP_MOV PUT_OP |
|
372 |
+MPEG4_QPEL8_H_LOWPASS put_no_rnd |
|
373 |
+ |
|
374 |
+ |
|
375 |
+ |
|
376 |
+%macro QPEL_V_LOW 5 |
|
377 |
+ paddw m0, m1 |
|
378 |
+ mova m4, [pw_20] |
|
379 |
+ pmullw m4, m0 |
|
380 |
+ mova m0, %4 |
|
381 |
+ mova m5, %1 |
|
382 |
+ paddw m5, m0 |
|
383 |
+ psubw m4, m5 |
|
384 |
+ mova m5, %2 |
|
385 |
+ mova m6, %3 |
|
386 |
+ paddw m5, m3 |
|
387 |
+ paddw m6, m2 |
|
388 |
+ paddw m6, m6 |
|
389 |
+ psubw m5, m6 |
|
390 |
+ pmullw m5, [pw_3] |
|
391 |
+ paddw m4, [PW_ROUND] |
|
392 |
+ paddw m5, m4 |
|
393 |
+ psraw m5, 5 |
|
394 |
+ packuswb m5, m5 |
|
395 |
+ OP_MOV %5, m5, m7 |
|
396 |
+ SWAP 0,1,2,3 |
|
397 |
+%endmacro |
|
398 |
+ |
|
399 |
+%macro MPEG4_QPEL16_V_LOWPASS 1 |
|
400 |
+cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544 |
|
401 |
+ movsxdifnidn r2, r2d |
|
402 |
+ movsxdifnidn r3, r3d |
|
403 |
+ |
|
404 |
+ mov r4d, 17 |
|
405 |
+ mov r5, rsp |
|
406 |
+ pxor m7, m7 |
|
407 |
+.looph: |
|
408 |
+ mova m0, [r1] |
|
409 |
+ mova m1, [r1] |
|
410 |
+ mova m2, [r1+8] |
|
411 |
+ mova m3, [r1+8] |
|
412 |
+ punpcklbw m0, m7 |
|
413 |
+ punpckhbw m1, m7 |
|
414 |
+ punpcklbw m2, m7 |
|
415 |
+ punpckhbw m3, m7 |
|
416 |
+ mova [r5], m0 |
|
417 |
+ mova [r5+0x88], m1 |
|
418 |
+ mova [r5+0x110], m2 |
|
419 |
+ mova [r5+0x198], m3 |
|
420 |
+ add r5, 8 |
|
421 |
+ add r1, r3 |
|
422 |
+ dec r4d |
|
423 |
+ jne .looph |
|
424 |
+ |
|
425 |
+ |
|
426 |
+ ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 14*dstStride |
|
427 |
+ mov r4d, 4 |
|
428 |
+ mov r1, 4 |
|
429 |
+ neg r2 |
|
430 |
+ lea r1, [r1+r2*8] |
|
431 |
+ lea r1, [r1+r2*4] |
|
432 |
+ lea r1, [r1+r2*2] |
|
433 |
+ neg r2 |
|
434 |
+ mov r5, rsp |
|
435 |
+.loopv: |
|
436 |
+ pxor m7, m7 |
|
437 |
+ mova m0, [r5+ 0x0] |
|
438 |
+ mova m1, [r5+ 0x8] |
|
439 |
+ mova m2, [r5+0x10] |
|
440 |
+ mova m3, [r5+0x18] |
|
441 |
+ QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0] |
|
442 |
+ QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2] |
|
443 |
+ lea r0, [r0+r2*2] |
|
444 |
+ QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0] |
|
445 |
+ QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2] |
|
446 |
+ lea r0, [r0+r2*2] |
|
447 |
+ QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0] |
|
448 |
+ QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x48], [r0+r2] |
|
449 |
+ lea r0, [r0+r2*2] |
|
450 |
+ QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x50], [r0] |
|
451 |
+ QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x58], [r0+r2] |
|
452 |
+ lea r0, [r0+r2*2] |
|
453 |
+ QPEL_V_LOW [r5+0x28], [r5+0x30], [r5+0x38], [r5+0x60], [r0] |
|
454 |
+ QPEL_V_LOW [r5+0x30], [r5+0x38], [r5+0x40], [r5+0x68], [r0+r2] |
|
455 |
+ lea r0, [r0+r2*2] |
|
456 |
+ QPEL_V_LOW [r5+0x38], [r5+0x40], [r5+0x48], [r5+0x70], [r0] |
|
457 |
+ QPEL_V_LOW [r5+0x40], [r5+0x48], [r5+0x50], [r5+0x78], [r0+r2] |
|
458 |
+ lea r0, [r0+r2*2] |
|
459 |
+ QPEL_V_LOW [r5+0x48], [r5+0x50], [r5+0x58], [r5+0x80], [r0] |
|
460 |
+ QPEL_V_LOW [r5+0x50], [r5+0x58], [r5+0x60], [r5+0x80], [r0+r2] |
|
461 |
+ lea r0, [r0+r2*2] |
|
462 |
+ QPEL_V_LOW [r5+0x58], [r5+0x60], [r5+0x68], [r5+0x78], [r0] |
|
463 |
+ QPEL_V_LOW [r5+0x60], [r5+0x68], [r5+0x70], [r5+0x70], [r0+r2] |
|
464 |
+ |
|
465 |
+ add r5, 0x88 |
|
466 |
+ add r0, r1 |
|
467 |
+ dec r4d |
|
468 |
+ jne .loopv |
|
469 |
+ REP_RET |
|
470 |
+%endmacro |
|
471 |
+ |
|
472 |
+%macro PUT_OPH 2-3 |
|
473 |
+ movh %1, %2 |
|
474 |
+%endmacro |
|
475 |
+ |
|
476 |
+%macro AVG_OPH 2-3 |
|
477 |
+ movh %3, %1 |
|
478 |
+ pavgb %2, %3 |
|
479 |
+ movh %1, %2 |
|
480 |
+%endmacro |
|
481 |
+ |
|
482 |
+INIT_MMX mmxext |
|
483 |
+%define PW_ROUND pw_16 |
|
484 |
+%define OP_MOV PUT_OPH |
|
485 |
+MPEG4_QPEL16_V_LOWPASS put |
|
486 |
+%define PW_ROUND pw_16 |
|
487 |
+%define OP_MOV AVG_OPH |
|
488 |
+MPEG4_QPEL16_V_LOWPASS avg |
|
489 |
+%define PW_ROUND pw_15 |
|
490 |
+%define OP_MOV PUT_OPH |
|
491 |
+MPEG4_QPEL16_V_LOWPASS put_no_rnd |
|
492 |
+ |
|
493 |
+ |
|
494 |
+ |
|
495 |
+%macro MPEG4_QPEL8_V_LOWPASS 1 |
|
496 |
+cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288 |
|
497 |
+ movsxdifnidn r2, r2d |
|
498 |
+ movsxdifnidn r3, r3d |
|
499 |
+ |
|
500 |
+ mov r4d, 9 |
|
501 |
+ mov r5, rsp |
|
502 |
+ pxor m7, m7 |
|
503 |
+.looph: |
|
504 |
+ mova m0, [r1] |
|
505 |
+ mova m1, [r1] |
|
506 |
+ punpcklbw m0, m7 |
|
507 |
+ punpckhbw m1, m7 |
|
508 |
+ mova [r5], m0 |
|
509 |
+ mova [r5+0x48], m1 |
|
510 |
+ add r5, 8 |
|
511 |
+ add r1, r3 |
|
512 |
+ dec r4d |
|
513 |
+ jne .looph |
|
514 |
+ |
|
515 |
+ |
|
516 |
+ ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 6*dstStride |
|
517 |
+ mov r4d, 2 |
|
518 |
+ mov r1, 4 |
|
519 |
+ neg r2 |
|
520 |
+ lea r1, [r1+r2*4] |
|
521 |
+ lea r1, [r1+r2*2] |
|
522 |
+ neg r2 |
|
523 |
+ mov r5, rsp |
|
524 |
+.loopv: |
|
525 |
+ pxor m7, m7 |
|
526 |
+ mova m0, [r5+ 0x0] |
|
527 |
+ mova m1, [r5+ 0x8] |
|
528 |
+ mova m2, [r5+0x10] |
|
529 |
+ mova m3, [r5+0x18] |
|
530 |
+ QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0] |
|
531 |
+ QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2] |
|
532 |
+ lea r0, [r0+r2*2] |
|
533 |
+ QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0] |
|
534 |
+ QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2] |
|
535 |
+ lea r0, [r0+r2*2] |
|
536 |
+ QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0] |
|
537 |
+ QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x40], [r0+r2] |
|
538 |
+ lea r0, [r0+r2*2] |
|
539 |
+ QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x38], [r0] |
|
540 |
+ QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x30], [r0+r2] |
|
541 |
+ |
|
542 |
+ add r5, 0x48 |
|
543 |
+ add r0, r1 |
|
544 |
+ dec r4d |
|
545 |
+ jne .loopv |
|
546 |
+ REP_RET |
|
547 |
+%endmacro |
|
548 |
+ |
|
549 |
+INIT_MMX mmxext |
|
550 |
+%define PW_ROUND pw_16 |
|
551 |
+%define OP_MOV PUT_OPH |
|
552 |
+MPEG4_QPEL8_V_LOWPASS put |
|
553 |
+%define PW_ROUND pw_16 |
|
554 |
+%define OP_MOV AVG_OPH |
|
555 |
+MPEG4_QPEL8_V_LOWPASS avg |
|
556 |
+%define PW_ROUND pw_15 |
|
557 |
+%define OP_MOV PUT_OPH |
|
558 |
+MPEG4_QPEL8_V_LOWPASS put_no_rnd |
0 | 559 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,501 @@ |
0 |
+/* |
|
1 |
+ * quarterpel DSP functions |
|
2 |
+ * |
|
3 |
+ * This file is part of Libav. |
|
4 |
+ * |
|
5 |
+ * Libav is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * Libav is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with Libav; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include <stddef.h> |
|
21 |
+#include <stdint.h> |
|
22 |
+ |
|
23 |
+#include "config.h" |
|
24 |
+#include "libavutil/attributes.h" |
|
25 |
+#include "libavutil/cpu.h" |
|
26 |
+#include "libavutil/x86/cpu.h" |
|
27 |
+#include "libavcodec/pixels.h" |
|
28 |
+#include "libavcodec/qpeldsp.h" |
|
29 |
+#include "fpel.h" |
|
30 |
+ |
|
31 |
+void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
32 |
+ int dstStride, int src1Stride, int h); |
|
33 |
+void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, |
|
34 |
+ uint8_t *src2, int dstStride, |
|
35 |
+ int src1Stride, int h); |
|
36 |
+void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
37 |
+ int dstStride, int src1Stride, int h); |
|
38 |
+void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
39 |
+ int dstStride, int src1Stride, int h); |
|
40 |
+void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
41 |
+ int dstStride, int src1Stride, int h); |
|
42 |
+void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, |
|
43 |
+ int dstStride, int src1Stride, int h); |
|
44 |
+void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
45 |
+ int dstStride, int srcStride, int h); |
|
46 |
+void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
47 |
+ int dstStride, int srcStride, int h); |
|
48 |
+void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
49 |
+ int dstStride, int srcStride, |
|
50 |
+ int h); |
|
51 |
+void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
52 |
+ int dstStride, int srcStride, int h); |
|
53 |
+void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
54 |
+ int dstStride, int srcStride, int h); |
|
55 |
+void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
56 |
+ int dstStride, int srcStride, |
|
57 |
+ int h); |
|
58 |
+void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
59 |
+ int dstStride, int srcStride); |
|
60 |
+void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
61 |
+ int dstStride, int srcStride); |
|
62 |
+void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
63 |
+ int dstStride, int srcStride); |
|
64 |
+void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
65 |
+ int dstStride, int srcStride); |
|
66 |
+void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
67 |
+ int dstStride, int srcStride); |
|
68 |
+void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, |
|
69 |
+ int dstStride, int srcStride); |
|
70 |
+#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext |
|
71 |
+#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext |
|
72 |
+ |
|
73 |
+#if HAVE_YASM |
|
74 |
+ |
|
75 |
+CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8) |
|
76 |
+CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8) |
|
77 |
+ |
|
78 |
+#define QPEL_OP(OPNAME, RND, MMX) \ |
|
79 |
+static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
80 |
+ ptrdiff_t stride) \ |
|
81 |
+{ \ |
|
82 |
+ ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \ |
|
83 |
+} \ |
|
84 |
+ \ |
|
85 |
+static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
86 |
+ ptrdiff_t stride) \ |
|
87 |
+{ \ |
|
88 |
+ uint64_t temp[8]; \ |
|
89 |
+ uint8_t *const half = (uint8_t *) temp; \ |
|
90 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ |
|
91 |
+ stride, 8); \ |
|
92 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ |
|
93 |
+ stride, stride, 8); \ |
|
94 |
+} \ |
|
95 |
+ \ |
|
96 |
+static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
97 |
+ ptrdiff_t stride) \ |
|
98 |
+{ \ |
|
99 |
+ ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \ |
|
100 |
+ stride, 8); \ |
|
101 |
+} \ |
|
102 |
+ \ |
|
103 |
+static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
104 |
+ ptrdiff_t stride) \ |
|
105 |
+{ \ |
|
106 |
+ uint64_t temp[8]; \ |
|
107 |
+ uint8_t *const half = (uint8_t *) temp; \ |
|
108 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ |
|
109 |
+ stride, 8); \ |
|
110 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \ |
|
111 |
+ stride, 8); \ |
|
112 |
+} \ |
|
113 |
+ \ |
|
114 |
+static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
115 |
+ ptrdiff_t stride) \ |
|
116 |
+{ \ |
|
117 |
+ uint64_t temp[8]; \ |
|
118 |
+ uint8_t *const half = (uint8_t *) temp; \ |
|
119 |
+ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ |
|
120 |
+ 8, stride); \ |
|
121 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ |
|
122 |
+ stride, stride, 8); \ |
|
123 |
+} \ |
|
124 |
+ \ |
|
125 |
+static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
126 |
+ ptrdiff_t stride) \ |
|
127 |
+{ \ |
|
128 |
+ ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \ |
|
129 |
+ stride, stride); \ |
|
130 |
+} \ |
|
131 |
+ \ |
|
132 |
+static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
133 |
+ ptrdiff_t stride) \ |
|
134 |
+{ \ |
|
135 |
+ uint64_t temp[8]; \ |
|
136 |
+ uint8_t *const half = (uint8_t *) temp; \ |
|
137 |
+ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ |
|
138 |
+ 8, stride); \ |
|
139 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\ |
|
140 |
+ stride, 8); \ |
|
141 |
+} \ |
|
142 |
+ \ |
|
143 |
+static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
144 |
+ ptrdiff_t stride) \ |
|
145 |
+{ \ |
|
146 |
+ uint64_t half[8 + 9]; \ |
|
147 |
+ uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
148 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
149 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
150 |
+ stride, 9); \ |
|
151 |
+ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ |
|
152 |
+ stride, 9); \ |
|
153 |
+ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
154 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ |
|
155 |
+ stride, 8, 8); \ |
|
156 |
+} \ |
|
157 |
+ \ |
|
158 |
+static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
159 |
+ ptrdiff_t stride) \ |
|
160 |
+{ \ |
|
161 |
+ uint64_t half[8 + 9]; \ |
|
162 |
+ uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
163 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
164 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
165 |
+ stride, 9); \ |
|
166 |
+ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ |
|
167 |
+ stride, 9); \ |
|
168 |
+ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
169 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ |
|
170 |
+ stride, 8, 8); \ |
|
171 |
+} \ |
|
172 |
+ \ |
|
173 |
+static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
174 |
+ ptrdiff_t stride) \ |
|
175 |
+{ \ |
|
176 |
+ uint64_t half[8 + 9]; \ |
|
177 |
+ uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
178 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
179 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
180 |
+ stride, 9); \ |
|
181 |
+ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ |
|
182 |
+ stride, 9); \ |
|
183 |
+ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
184 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ |
|
185 |
+ stride, 8, 8); \ |
|
186 |
+} \ |
|
187 |
+ \ |
|
188 |
+static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
189 |
+ ptrdiff_t stride) \ |
|
190 |
+{ \ |
|
191 |
+ uint64_t half[8 + 9]; \ |
|
192 |
+ uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
193 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
194 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
195 |
+ stride, 9); \ |
|
196 |
+ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ |
|
197 |
+ stride, 9); \ |
|
198 |
+ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
199 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ |
|
200 |
+ stride, 8, 8); \ |
|
201 |
+} \ |
|
202 |
+ \ |
|
203 |
+static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
204 |
+ ptrdiff_t stride) \ |
|
205 |
+{ \ |
|
206 |
+ uint64_t half[8 + 9]; \ |
|
207 |
+ uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
208 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
209 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
210 |
+ stride, 9); \ |
|
211 |
+ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
212 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ |
|
213 |
+ stride, 8, 8); \ |
|
214 |
+} \ |
|
215 |
+ \ |
|
216 |
+static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
217 |
+ ptrdiff_t stride) \ |
|
218 |
+{ \ |
|
219 |
+ uint64_t half[8 + 9]; \ |
|
220 |
+ uint8_t *const halfH = (uint8_t *) half + 64; \ |
|
221 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
222 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
223 |
+ stride, 9); \ |
|
224 |
+ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
|
225 |
+ ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ |
|
226 |
+ stride, 8, 8); \ |
|
227 |
+} \ |
|
228 |
+ \ |
|
229 |
+static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
230 |
+ ptrdiff_t stride) \ |
|
231 |
+{ \ |
|
232 |
+ uint64_t half[8 + 9]; \ |
|
233 |
+ uint8_t *const halfH = (uint8_t *) half; \ |
|
234 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
235 |
+ stride, 9); \ |
|
236 |
+ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \ |
|
237 |
+ 8, stride, 9); \ |
|
238 |
+ ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ |
|
239 |
+ stride, 8); \ |
|
240 |
+} \ |
|
241 |
+ \ |
|
242 |
+static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
243 |
+ ptrdiff_t stride) \ |
|
244 |
+{ \ |
|
245 |
+ uint64_t half[8 + 9]; \ |
|
246 |
+ uint8_t *const halfH = (uint8_t *) half; \ |
|
247 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
248 |
+ stride, 9); \ |
|
249 |
+ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ |
|
250 |
+ stride, 9); \ |
|
251 |
+ ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ |
|
252 |
+ stride, 8); \ |
|
253 |
+} \ |
|
254 |
+ \ |
|
255 |
+static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
256 |
+ ptrdiff_t stride) \ |
|
257 |
+{ \ |
|
258 |
+ uint64_t half[9]; \ |
|
259 |
+ uint8_t *const halfH = (uint8_t *) half; \ |
|
260 |
+ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ |
|
261 |
+ stride, 9); \ |
|
262 |
+ ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ |
|
263 |
+ stride, 8); \ |
|
264 |
+} \ |
|
265 |
+ \ |
|
266 |
+static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
267 |
+ ptrdiff_t stride) \ |
|
268 |
+{ \ |
|
269 |
+ ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \ |
|
270 |
+} \ |
|
271 |
+ \ |
|
272 |
+static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
273 |
+ ptrdiff_t stride) \ |
|
274 |
+{ \ |
|
275 |
+ uint64_t temp[32]; \ |
|
276 |
+ uint8_t *const half = (uint8_t *) temp; \ |
|
277 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ |
|
278 |
+ stride, 16); \ |
|
279 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ |
|
280 |
+ stride, 16); \ |
|
281 |
+} \ |
|
282 |
+ \ |
|
283 |
+static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
284 |
+ ptrdiff_t stride) \ |
|
285 |
+{ \ |
|
286 |
+ ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \ |
|
287 |
+ stride, stride, 16);\ |
|
288 |
+} \ |
|
289 |
+ \ |
|
290 |
+static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
291 |
+ ptrdiff_t stride) \ |
|
292 |
+{ \ |
|
293 |
+ uint64_t temp[32]; \ |
|
294 |
+ uint8_t *const half = (uint8_t*) temp; \ |
|
295 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ |
|
296 |
+ stride, 16); \ |
|
297 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \ |
|
298 |
+ stride, stride, 16); \ |
|
299 |
+} \ |
|
300 |
+ \ |
|
301 |
+static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
302 |
+ ptrdiff_t stride) \ |
|
303 |
+{ \ |
|
304 |
+ uint64_t temp[32]; \ |
|
305 |
+ uint8_t *const half = (uint8_t *) temp; \ |
|
306 |
+ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ |
|
307 |
+ stride); \ |
|
308 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ |
|
309 |
+ stride, 16); \ |
|
310 |
+} \ |
|
311 |
+ \ |
|
312 |
+static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
313 |
+ ptrdiff_t stride) \ |
|
314 |
+{ \ |
|
315 |
+ ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \ |
|
316 |
+ stride, stride); \ |
|
317 |
+} \ |
|
318 |
+ \ |
|
319 |
+static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
320 |
+ ptrdiff_t stride) \ |
|
321 |
+{ \ |
|
322 |
+ uint64_t temp[32]; \ |
|
323 |
+ uint8_t *const half = (uint8_t *) temp; \ |
|
324 |
+ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ |
|
325 |
+ stride); \ |
|
326 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \ |
|
327 |
+ stride, stride, 16); \ |
|
328 |
+} \ |
|
329 |
+ \ |
|
330 |
+static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
331 |
+ ptrdiff_t stride) \ |
|
332 |
+{ \ |
|
333 |
+ uint64_t half[16 * 2 + 17 * 2]; \ |
|
334 |
+ uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
335 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
336 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
337 |
+ stride, 17); \ |
|
338 |
+ ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ |
|
339 |
+ stride, 17); \ |
|
340 |
+ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
341 |
+ 16, 16); \ |
|
342 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ |
|
343 |
+ stride, 16, 16); \ |
|
344 |
+} \ |
|
345 |
+ \ |
|
346 |
+static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
347 |
+ ptrdiff_t stride) \ |
|
348 |
+{ \ |
|
349 |
+ uint64_t half[16 * 2 + 17 * 2]; \ |
|
350 |
+ uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
351 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
352 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
353 |
+ stride, 17); \ |
|
354 |
+ ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ |
|
355 |
+ stride, 17); \ |
|
356 |
+ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
357 |
+ 16, 16); \ |
|
358 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ |
|
359 |
+ stride, 16, 16); \ |
|
360 |
+} \ |
|
361 |
+ \ |
|
362 |
+static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
363 |
+ ptrdiff_t stride) \ |
|
364 |
+{ \ |
|
365 |
+ uint64_t half[16 * 2 + 17 * 2]; \ |
|
366 |
+ uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
367 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
368 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
369 |
+ stride, 17); \ |
|
370 |
+ ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ |
|
371 |
+ stride, 17); \ |
|
372 |
+ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
373 |
+ 16, 16); \ |
|
374 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ |
|
375 |
+ stride, 16, 16); \ |
|
376 |
+} \ |
|
377 |
+ \ |
|
378 |
+static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
379 |
+ ptrdiff_t stride) \ |
|
380 |
+{ \ |
|
381 |
+ uint64_t half[16 * 2 + 17 * 2]; \ |
|
382 |
+ uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
383 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
384 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
385 |
+ stride, 17); \ |
|
386 |
+ ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ |
|
387 |
+ stride, 17); \ |
|
388 |
+ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
389 |
+ 16, 16); \ |
|
390 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ |
|
391 |
+ stride, 16, 16); \ |
|
392 |
+} \ |
|
393 |
+ \ |
|
394 |
+static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
395 |
+ ptrdiff_t stride) \ |
|
396 |
+{ \ |
|
397 |
+ uint64_t half[16 * 2 + 17 * 2]; \ |
|
398 |
+ uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
399 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
400 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
401 |
+ stride, 17); \ |
|
402 |
+ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
403 |
+ 16, 16); \ |
|
404 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ |
|
405 |
+ stride, 16, 16); \ |
|
406 |
+} \ |
|
407 |
+ \ |
|
408 |
+static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
409 |
+ ptrdiff_t stride) \ |
|
410 |
+{ \ |
|
411 |
+ uint64_t half[16 * 2 + 17 * 2]; \ |
|
412 |
+ uint8_t *const halfH = (uint8_t *) half + 256; \ |
|
413 |
+ uint8_t *const halfHV = (uint8_t *) half; \ |
|
414 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
415 |
+ stride, 17); \ |
|
416 |
+ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ |
|
417 |
+ 16, 16); \ |
|
418 |
+ ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ |
|
419 |
+ stride, 16, 16); \ |
|
420 |
+} \ |
|
421 |
+ \ |
|
422 |
+static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
423 |
+ ptrdiff_t stride) \ |
|
424 |
+{ \ |
|
425 |
+ uint64_t half[17 * 2]; \ |
|
426 |
+ uint8_t *const halfH = (uint8_t *) half; \ |
|
427 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
428 |
+ stride, 17); \ |
|
429 |
+ ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ |
|
430 |
+ stride, 17); \ |
|
431 |
+ ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ |
|
432 |
+ stride, 16); \ |
|
433 |
+} \ |
|
434 |
+ \ |
|
435 |
+static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
436 |
+ ptrdiff_t stride) \ |
|
437 |
+{ \ |
|
438 |
+ uint64_t half[17 * 2]; \ |
|
439 |
+ uint8_t *const halfH = (uint8_t *) half; \ |
|
440 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
441 |
+ stride, 17); \ |
|
442 |
+ ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ |
|
443 |
+ stride, 17); \ |
|
444 |
+ ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ |
|
445 |
+ stride, 16); \ |
|
446 |
+} \ |
|
447 |
+ \ |
|
448 |
+static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ |
|
449 |
+ ptrdiff_t stride) \ |
|
450 |
+{ \ |
|
451 |
+ uint64_t half[17 * 2]; \ |
|
452 |
+ uint8_t *const halfH = (uint8_t *) half; \ |
|
453 |
+ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ |
|
454 |
+ stride, 17); \ |
|
455 |
+ ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ |
|
456 |
+ stride, 16); \ |
|
457 |
+} |
|
458 |
+ |
|
459 |
+QPEL_OP(put_, _, mmxext) |
|
460 |
+QPEL_OP(avg_, _, mmxext) |
|
461 |
+QPEL_OP(put_no_rnd_, _no_rnd_, mmxext) |
|
462 |
+ |
|
463 |
+#endif /* HAVE_YASM */ |
|
464 |
+ |
|
465 |
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ |
|
466 |
+do { \ |
|
467 |
+ c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ |
|
468 |
+ c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ |
|
469 |
+ c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ |
|
470 |
+ c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ |
|
471 |
+ c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ |
|
472 |
+ c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ |
|
473 |
+ c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ |
|
474 |
+ c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ |
|
475 |
+ c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ |
|
476 |
+ c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ |
|
477 |
+ c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ |
|
478 |
+ c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ |
|
479 |
+ c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ |
|
480 |
+ c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ |
|
481 |
+ c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ |
|
482 |
+ c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ |
|
483 |
+} while (0) |
|
484 |
+ |
|
485 |
+av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c) |
|
486 |
+{ |
|
487 |
+ int cpu_flags = av_get_cpu_flags(); |
|
488 |
+ |
|
489 |
+ if (X86_MMXEXT(cpu_flags)) { |
|
490 |
+#if HAVE_MMXEXT_EXTERNAL |
|
491 |
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, ); |
|
492 |
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, ); |
|
493 |
+ |
|
494 |
+ SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, ); |
|
495 |
+ SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, ); |
|
496 |
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); |
|
497 |
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); |
|
498 |
+#endif /* HAVE_MMXEXT_EXTERNAL */ |
|
499 |
+ } |
|
500 |
+} |