Originally committed as revision 2896 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
Michael Niedermayer authored on 2001/11/14 11:46:58... | ... |
@@ -35,7 +35,7 @@ LinIpolDeinterlace e E E* |
35 | 35 |
CubicIpolDeinterlace a e e* |
36 | 36 |
LinBlendDeinterlace e E E* |
37 | 37 |
MedianDeinterlace# Ec Ec |
38 |
-TempDeNoiser# a |
|
38 |
+TempDeNoiser# E e e |
|
39 | 39 |
|
40 | 40 |
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work |
41 | 41 |
# more or less selfinvented filters so the exactness isnt too meaningfull |
... | ... |
@@ -61,6 +61,7 @@ split this huge file |
61 | 61 |
border remover |
62 | 62 |
optimize c versions |
63 | 63 |
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
64 |
+smart blur |
|
64 | 65 |
... |
65 | 66 |
|
66 | 67 |
Notes: |
... | ... |
@@ -2592,10 +2593,294 @@ static inline void transpose2(uint8_t *dst, int dstStride, uint8_t *src) |
2592 | 2592 |
); |
2593 | 2593 |
} |
2594 | 2594 |
#endif |
2595 |
+//static int test=0; |
|
2595 | 2596 |
|
2596 | 2597 |
static void inline tempNoiseReducer(uint8_t *src, int stride, |
2597 | 2598 |
uint8_t *tempBlured, int *maxNoise) |
2598 | 2599 |
{ |
2600 |
+#define FAST_L2_DIFF |
|
2601 |
+//#define L1_DIFF //u should change the thresholds too if u try that one |
|
2602 |
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
|
2603 |
+ asm volatile( |
|
2604 |
+ "leal (%2, %2, 2), %%eax \n\t" // 3*stride |
|
2605 |
+ "leal (%2, %2, 4), %%ebx \n\t" // 5*stride |
|
2606 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2607 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
2608 |
+// %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+ebx %x+2eax %x+ecx %x+8%2 |
|
2609 |
+//FIXME reorder? |
|
2610 |
+#ifdef L1_DIFF //needs mmx2 |
|
2611 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2612 |
+ "psadbw (%1), %%mm0 \n\t" // |L0-R0| |
|
2613 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2614 |
+ "psadbw (%1, %2), %%mm1 \n\t" // |L1-R1| |
|
2615 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2616 |
+ "psadbw (%1, %2, 2), %%mm2 \n\t" // |L2-R2| |
|
2617 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2618 |
+ "psadbw (%1, %%eax), %%mm3 \n\t" // |L3-R3| |
|
2619 |
+ |
|
2620 |
+ "movq (%0, %2, 4), %%mm4 \n\t" // L4 |
|
2621 |
+ "paddw %%mm1, %%mm0 \n\t" |
|
2622 |
+ "psadbw (%1, %2, 4), %%mm4 \n\t" // |L4-R4| |
|
2623 |
+ "movq (%0, %%ebx), %%mm5 \n\t" // L5 |
|
2624 |
+ "paddw %%mm2, %%mm0 \n\t" |
|
2625 |
+ "psadbw (%1, %%ebx), %%mm5 \n\t" // |L5-R5| |
|
2626 |
+ "movq (%0, %%eax, 2), %%mm6 \n\t" // L6 |
|
2627 |
+ "paddw %%mm3, %%mm0 \n\t" |
|
2628 |
+ "psadbw (%1, %%eax, 2), %%mm6 \n\t" // |L6-R6| |
|
2629 |
+ "movq (%0, %%ecx), %%mm7 \n\t" // L7 |
|
2630 |
+ "paddw %%mm4, %%mm0 \n\t" |
|
2631 |
+ "psadbw (%1, %%ecx), %%mm7 \n\t" // |L7-R7| |
|
2632 |
+ "paddw %%mm5, %%mm6 \n\t" |
|
2633 |
+ "paddw %%mm7, %%mm6 \n\t" |
|
2634 |
+ "paddw %%mm6, %%mm0 \n\t" |
|
2635 |
+#elif defined (FAST_L2_DIFF) |
|
2636 |
+ "pcmpeqb %%mm7, %%mm7 \n\t" |
|
2637 |
+ "movq b80, %%mm6 \n\t" |
|
2638 |
+ "pxor %%mm0, %%mm0 \n\t" |
|
2639 |
+#define L2_DIFF_CORE(a, b)\ |
|
2640 |
+ "movq " #a ", %%mm5 \n\t"\ |
|
2641 |
+ "movq " #b ", %%mm2 \n\t"\ |
|
2642 |
+ "pxor %%mm7, %%mm2 \n\t"\ |
|
2643 |
+ PAVGB(%%mm2, %%mm5)\ |
|
2644 |
+ "paddb %%mm6, %%mm5 \n\t"\ |
|
2645 |
+ "movq %%mm5, %%mm2 \n\t"\ |
|
2646 |
+ "psllw $8, %%mm5 \n\t"\ |
|
2647 |
+ "pmaddwd %%mm5, %%mm5 \n\t"\ |
|
2648 |
+ "pmaddwd %%mm2, %%mm2 \n\t"\ |
|
2649 |
+ "paddd %%mm2, %%mm5 \n\t"\ |
|
2650 |
+ "psrld $14, %%mm5 \n\t"\ |
|
2651 |
+ "paddd %%mm5, %%mm0 \n\t" |
|
2652 |
+ |
|
2653 |
+L2_DIFF_CORE((%0), (%1)) |
|
2654 |
+L2_DIFF_CORE((%0, %2), (%1, %2)) |
|
2655 |
+L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2)) |
|
2656 |
+L2_DIFF_CORE((%0, %%eax), (%1, %%eax)) |
|
2657 |
+L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4)) |
|
2658 |
+L2_DIFF_CORE((%0, %%ebx), (%1, %%ebx)) |
|
2659 |
+L2_DIFF_CORE((%0, %%eax,2), (%1, %%eax,2)) |
|
2660 |
+L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) |
|
2661 |
+ |
|
2662 |
+#else |
|
2663 |
+ "pxor %%mm7, %%mm7 \n\t" |
|
2664 |
+ "pxor %%mm0, %%mm0 \n\t" |
|
2665 |
+#define L2_DIFF_CORE(a, b)\ |
|
2666 |
+ "movq " #a ", %%mm5 \n\t"\ |
|
2667 |
+ "movq " #b ", %%mm2 \n\t"\ |
|
2668 |
+ "movq %%mm5, %%mm1 \n\t"\ |
|
2669 |
+ "movq %%mm2, %%mm3 \n\t"\ |
|
2670 |
+ "punpcklbw %%mm7, %%mm5 \n\t"\ |
|
2671 |
+ "punpckhbw %%mm7, %%mm1 \n\t"\ |
|
2672 |
+ "punpcklbw %%mm7, %%mm2 \n\t"\ |
|
2673 |
+ "punpckhbw %%mm7, %%mm3 \n\t"\ |
|
2674 |
+ "psubw %%mm2, %%mm5 \n\t"\ |
|
2675 |
+ "psubw %%mm3, %%mm1 \n\t"\ |
|
2676 |
+ "pmaddwd %%mm5, %%mm5 \n\t"\ |
|
2677 |
+ "pmaddwd %%mm1, %%mm1 \n\t"\ |
|
2678 |
+ "paddd %%mm1, %%mm5 \n\t"\ |
|
2679 |
+ "paddd %%mm5, %%mm0 \n\t" |
|
2680 |
+ |
|
2681 |
+L2_DIFF_CORE((%0), (%1)) |
|
2682 |
+L2_DIFF_CORE((%0, %2), (%1, %2)) |
|
2683 |
+L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2)) |
|
2684 |
+L2_DIFF_CORE((%0, %%eax), (%1, %%eax)) |
|
2685 |
+L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4)) |
|
2686 |
+L2_DIFF_CORE((%0, %%ebx), (%1, %%ebx)) |
|
2687 |
+L2_DIFF_CORE((%0, %%eax,2), (%1, %%eax,2)) |
|
2688 |
+L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) |
|
2689 |
+ |
|
2690 |
+#endif |
|
2691 |
+ |
|
2692 |
+ "movq %%mm0, %%mm4 \n\t" |
|
2693 |
+ "psrlq $32, %%mm0 \n\t" |
|
2694 |
+ "paddd %%mm0, %%mm4 \n\t" |
|
2695 |
+ "movd %%mm4, %%ecx \n\t" |
|
2696 |
+// "movl %3, %%ecx \n\t" |
|
2697 |
+// "movl %%ecx, test \n\t" |
|
2698 |
+// "jmp 4f \n\t" |
|
2699 |
+ "cmpl %4, %%ecx \n\t" |
|
2700 |
+ " jb 2f \n\t" |
|
2701 |
+ "cmpl %5, %%ecx \n\t" |
|
2702 |
+ " jb 1f \n\t" |
|
2703 |
+ |
|
2704 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2705 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2706 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2707 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2708 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2709 |
+ "movq (%0, %2, 4), %%mm4 \n\t" // L4 |
|
2710 |
+ "movq (%0, %%ebx), %%mm5 \n\t" // L5 |
|
2711 |
+ "movq (%0, %%eax, 2), %%mm6 \n\t" // L6 |
|
2712 |
+ "movq (%0, %%ecx), %%mm7 \n\t" // L7 |
|
2713 |
+ "movq %%mm0, (%1) \n\t" // L0 |
|
2714 |
+ "movq %%mm1, (%1, %2) \n\t" // L1 |
|
2715 |
+ "movq %%mm2, (%1, %2, 2) \n\t" // L2 |
|
2716 |
+ "movq %%mm3, (%1, %%eax) \n\t" // L3 |
|
2717 |
+ "movq %%mm4, (%1, %2, 4) \n\t" // L4 |
|
2718 |
+ "movq %%mm5, (%1, %%ebx) \n\t" // L5 |
|
2719 |
+ "movq %%mm6, (%1, %%eax, 2) \n\t" // L6 |
|
2720 |
+ "movq %%mm7, (%1, %%ecx) \n\t" // L7 |
|
2721 |
+ "jmp 4f \n\t" |
|
2722 |
+ |
|
2723 |
+ "1: \n\t" |
|
2724 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2725 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2726 |
+ "pavgb (%1), %%mm0 \n\t" // L0 |
|
2727 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2728 |
+ "pavgb (%1, %2), %%mm1 \n\t" // L1 |
|
2729 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2730 |
+ "pavgb (%1, %2, 2), %%mm2 \n\t" // L2 |
|
2731 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2732 |
+ "pavgb (%1, %%eax), %%mm3 \n\t" // L3 |
|
2733 |
+ "movq (%0, %2, 4), %%mm4 \n\t" // L4 |
|
2734 |
+ "pavgb (%1, %2, 4), %%mm4 \n\t" // L4 |
|
2735 |
+ "movq (%0, %%ebx), %%mm5 \n\t" // L5 |
|
2736 |
+ "pavgb (%1, %%ebx), %%mm5 \n\t" // L5 |
|
2737 |
+ "movq (%0, %%eax, 2), %%mm6 \n\t" // L6 |
|
2738 |
+ "pavgb (%1, %%eax, 2), %%mm6 \n\t" // L6 |
|
2739 |
+ "movq (%0, %%ecx), %%mm7 \n\t" // L7 |
|
2740 |
+ "pavgb (%1, %%ecx), %%mm7 \n\t" // L7 |
|
2741 |
+ "movq %%mm0, (%1) \n\t" // R0 |
|
2742 |
+ "movq %%mm1, (%1, %2) \n\t" // R1 |
|
2743 |
+ "movq %%mm2, (%1, %2, 2) \n\t" // R2 |
|
2744 |
+ "movq %%mm3, (%1, %%eax) \n\t" // R3 |
|
2745 |
+ "movq %%mm4, (%1, %2, 4) \n\t" // R4 |
|
2746 |
+ "movq %%mm5, (%1, %%ebx) \n\t" // R5 |
|
2747 |
+ "movq %%mm6, (%1, %%eax, 2) \n\t" // R6 |
|
2748 |
+ "movq %%mm7, (%1, %%ecx) \n\t" // R7 |
|
2749 |
+ "movq %%mm0, (%0) \n\t" // L0 |
|
2750 |
+ "movq %%mm1, (%0, %2) \n\t" // L1 |
|
2751 |
+ "movq %%mm2, (%0, %2, 2) \n\t" // L2 |
|
2752 |
+ "movq %%mm3, (%0, %%eax) \n\t" // L3 |
|
2753 |
+ "movq %%mm4, (%0, %2, 4) \n\t" // L4 |
|
2754 |
+ "movq %%mm5, (%0, %%ebx) \n\t" // L5 |
|
2755 |
+ "movq %%mm6, (%0, %%eax, 2) \n\t" // L6 |
|
2756 |
+ "movq %%mm7, (%0, %%ecx) \n\t" // L7 |
|
2757 |
+ "jmp 4f \n\t" |
|
2758 |
+ |
|
2759 |
+ "2: \n\t" |
|
2760 |
+ "cmpl %3, %%ecx \n\t" |
|
2761 |
+ " jb 3f \n\t" |
|
2762 |
+ |
|
2763 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2764 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2765 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2766 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2767 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2768 |
+ "movq (%1), %%mm4 \n\t" // R0 |
|
2769 |
+ "movq (%1, %2), %%mm5 \n\t" // R1 |
|
2770 |
+ "movq (%1, %2, 2), %%mm6 \n\t" // R2 |
|
2771 |
+ "movq (%1, %%eax), %%mm7 \n\t" // R3 |
|
2772 |
+ PAVGB(%%mm4, %%mm0) |
|
2773 |
+ PAVGB(%%mm5, %%mm1) |
|
2774 |
+ PAVGB(%%mm6, %%mm2) |
|
2775 |
+ PAVGB(%%mm7, %%mm3) |
|
2776 |
+ PAVGB(%%mm4, %%mm0) |
|
2777 |
+ PAVGB(%%mm5, %%mm1) |
|
2778 |
+ PAVGB(%%mm6, %%mm2) |
|
2779 |
+ PAVGB(%%mm7, %%mm3) |
|
2780 |
+ "movq %%mm0, (%1) \n\t" // R0 |
|
2781 |
+ "movq %%mm1, (%1, %2) \n\t" // R1 |
|
2782 |
+ "movq %%mm2, (%1, %2, 2) \n\t" // R2 |
|
2783 |
+ "movq %%mm3, (%1, %%eax) \n\t" // R3 |
|
2784 |
+ "movq %%mm0, (%0) \n\t" // L0 |
|
2785 |
+ "movq %%mm1, (%0, %2) \n\t" // L1 |
|
2786 |
+ "movq %%mm2, (%0, %2, 2) \n\t" // L2 |
|
2787 |
+ "movq %%mm3, (%0, %%eax) \n\t" // L3 |
|
2788 |
+ |
|
2789 |
+ "movq (%0, %2, 4), %%mm0 \n\t" // L4 |
|
2790 |
+ "movq (%0, %%ebx), %%mm1 \n\t" // L5 |
|
2791 |
+ "movq (%0, %%eax, 2), %%mm2 \n\t" // L6 |
|
2792 |
+ "movq (%0, %%ecx), %%mm3 \n\t" // L7 |
|
2793 |
+ "movq (%1, %2, 4), %%mm4 \n\t" // R4 |
|
2794 |
+ "movq (%1, %%ebx), %%mm5 \n\t" // R5 |
|
2795 |
+ "movq (%1, %%eax, 2), %%mm6 \n\t" // R6 |
|
2796 |
+ "movq (%1, %%ecx), %%mm7 \n\t" // R7 |
|
2797 |
+ PAVGB(%%mm4, %%mm0) |
|
2798 |
+ PAVGB(%%mm5, %%mm1) |
|
2799 |
+ PAVGB(%%mm6, %%mm2) |
|
2800 |
+ PAVGB(%%mm7, %%mm3) |
|
2801 |
+ PAVGB(%%mm4, %%mm0) |
|
2802 |
+ PAVGB(%%mm5, %%mm1) |
|
2803 |
+ PAVGB(%%mm6, %%mm2) |
|
2804 |
+ PAVGB(%%mm7, %%mm3) |
|
2805 |
+ "movq %%mm0, (%1, %2, 4) \n\t" // R4 |
|
2806 |
+ "movq %%mm1, (%1, %%ebx) \n\t" // R5 |
|
2807 |
+ "movq %%mm2, (%1, %%eax, 2) \n\t" // R6 |
|
2808 |
+ "movq %%mm3, (%1, %%ecx) \n\t" // R7 |
|
2809 |
+ "movq %%mm0, (%0, %2, 4) \n\t" // L4 |
|
2810 |
+ "movq %%mm1, (%0, %%ebx) \n\t" // L5 |
|
2811 |
+ "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 |
|
2812 |
+ "movq %%mm3, (%0, %%ecx) \n\t" // L7 |
|
2813 |
+ "jmp 4f \n\t" |
|
2814 |
+ |
|
2815 |
+ "3: \n\t" |
|
2816 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2817 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2818 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2819 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2820 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2821 |
+ "movq (%1), %%mm4 \n\t" // R0 |
|
2822 |
+ "movq (%1, %2), %%mm5 \n\t" // R1 |
|
2823 |
+ "movq (%1, %2, 2), %%mm6 \n\t" // R2 |
|
2824 |
+ "movq (%1, %%eax), %%mm7 \n\t" // R3 |
|
2825 |
+ PAVGB(%%mm4, %%mm0) |
|
2826 |
+ PAVGB(%%mm5, %%mm1) |
|
2827 |
+ PAVGB(%%mm6, %%mm2) |
|
2828 |
+ PAVGB(%%mm7, %%mm3) |
|
2829 |
+ PAVGB(%%mm4, %%mm0) |
|
2830 |
+ PAVGB(%%mm5, %%mm1) |
|
2831 |
+ PAVGB(%%mm6, %%mm2) |
|
2832 |
+ PAVGB(%%mm7, %%mm3) |
|
2833 |
+ PAVGB(%%mm4, %%mm0) |
|
2834 |
+ PAVGB(%%mm5, %%mm1) |
|
2835 |
+ PAVGB(%%mm6, %%mm2) |
|
2836 |
+ PAVGB(%%mm7, %%mm3) |
|
2837 |
+ "movq %%mm0, (%1) \n\t" // R0 |
|
2838 |
+ "movq %%mm1, (%1, %2) \n\t" // R1 |
|
2839 |
+ "movq %%mm2, (%1, %2, 2) \n\t" // R2 |
|
2840 |
+ "movq %%mm3, (%1, %%eax) \n\t" // R3 |
|
2841 |
+ "movq %%mm0, (%0) \n\t" // L0 |
|
2842 |
+ "movq %%mm1, (%0, %2) \n\t" // L1 |
|
2843 |
+ "movq %%mm2, (%0, %2, 2) \n\t" // L2 |
|
2844 |
+ "movq %%mm3, (%0, %%eax) \n\t" // L3 |
|
2845 |
+ |
|
2846 |
+ "movq (%0, %2, 4), %%mm0 \n\t" // L4 |
|
2847 |
+ "movq (%0, %%ebx), %%mm1 \n\t" // L5 |
|
2848 |
+ "movq (%0, %%eax, 2), %%mm2 \n\t" // L6 |
|
2849 |
+ "movq (%0, %%ecx), %%mm3 \n\t" // L7 |
|
2850 |
+ "movq (%1, %2, 4), %%mm4 \n\t" // R4 |
|
2851 |
+ "movq (%1, %%ebx), %%mm5 \n\t" // R5 |
|
2852 |
+ "movq (%1, %%eax, 2), %%mm6 \n\t" // R6 |
|
2853 |
+ "movq (%1, %%ecx), %%mm7 \n\t" // R7 |
|
2854 |
+ PAVGB(%%mm4, %%mm0) |
|
2855 |
+ PAVGB(%%mm5, %%mm1) |
|
2856 |
+ PAVGB(%%mm6, %%mm2) |
|
2857 |
+ PAVGB(%%mm7, %%mm3) |
|
2858 |
+ PAVGB(%%mm4, %%mm0) |
|
2859 |
+ PAVGB(%%mm5, %%mm1) |
|
2860 |
+ PAVGB(%%mm6, %%mm2) |
|
2861 |
+ PAVGB(%%mm7, %%mm3) |
|
2862 |
+ PAVGB(%%mm4, %%mm0) |
|
2863 |
+ PAVGB(%%mm5, %%mm1) |
|
2864 |
+ PAVGB(%%mm6, %%mm2) |
|
2865 |
+ PAVGB(%%mm7, %%mm3) |
|
2866 |
+ "movq %%mm0, (%1, %2, 4) \n\t" // R4 |
|
2867 |
+ "movq %%mm1, (%1, %%ebx) \n\t" // R5 |
|
2868 |
+ "movq %%mm2, (%1, %%eax, 2) \n\t" // R6 |
|
2869 |
+ "movq %%mm3, (%1, %%ecx) \n\t" // R7 |
|
2870 |
+ "movq %%mm0, (%0, %2, 4) \n\t" // L4 |
|
2871 |
+ "movq %%mm1, (%0, %%ebx) \n\t" // L5 |
|
2872 |
+ "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 |
|
2873 |
+ "movq %%mm3, (%0, %%ecx) \n\t" // L7 |
|
2874 |
+ |
|
2875 |
+ "4: \n\t" |
|
2876 |
+ |
|
2877 |
+ :: "r" (src), "r" (tempBlured), "r"(stride), |
|
2878 |
+ "m"(maxNoise[0]), "m"(maxNoise[1]), "m"(maxNoise[2]) |
|
2879 |
+ : "%eax", "%ebx", "%ecx", "memory" |
|
2880 |
+ ); |
|
2881 |
+//printf("%d\n", test); |
|
2882 |
+#else |
|
2599 | 2883 |
int y; |
2600 | 2884 |
int d=0; |
2601 | 2885 |
int sysd=0; |
... | ... |
@@ -2608,7 +2893,10 @@ static void inline tempNoiseReducer(uint8_t *src, int stride, |
2608 | 2608 |
int ref= tempBlured[ x + y*stride ]; |
2609 | 2609 |
int cur= src[ x + y*stride ]; |
2610 | 2610 |
int d1=ref - cur; |
2611 |
- d+= ABS(d1); //d1*d1; |
|
2611 |
+// if(x==0 || x==7) d1+= d1>>1; |
|
2612 |
+// if(y==0 || y==7) d1+= d1>>1; |
|
2613 |
+// d+= ABS(d1); |
|
2614 |
+ d+= d1*d1; |
|
2612 | 2615 |
sysd+= d1; |
2613 | 2616 |
} |
2614 | 2617 |
} |
... | ... |
@@ -2682,6 +2970,7 @@ Switch between |
2682 | 2682 |
} |
2683 | 2683 |
} |
2684 | 2684 |
} |
2685 |
+#endif |
|
2685 | 2686 |
} |
2686 | 2687 |
|
2687 | 2688 |
#ifdef HAVE_ODIVX_POSTPROCESS |
... | ... |
@@ -2914,9 +3203,9 @@ void postprocess(unsigned char * src[], int src_stride, |
2914 | 2914 |
ppMode.lumMode= mode; |
2915 | 2915 |
mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00); |
2916 | 2916 |
ppMode.chromMode= mode; |
2917 |
- ppMode.maxTmpNoise[0]= 150; |
|
2918 |
- ppMode.maxTmpNoise[1]= 200; |
|
2919 |
- ppMode.maxTmpNoise[2]= 400; |
|
2917 |
+ ppMode.maxTmpNoise[0]= 700; |
|
2918 |
+ ppMode.maxTmpNoise[1]= 1500; |
|
2919 |
+ ppMode.maxTmpNoise[2]= 3000; |
|
2920 | 2920 |
|
2921 | 2921 |
#ifdef HAVE_ODIVX_POSTPROCESS |
2922 | 2922 |
// Note: I could make this shit outside of this file, but it would mean one |
... | ... |
@@ -35,7 +35,7 @@ LinIpolDeinterlace e E E* |
35 | 35 |
CubicIpolDeinterlace a e e* |
36 | 36 |
LinBlendDeinterlace e E E* |
37 | 37 |
MedianDeinterlace# Ec Ec |
38 |
-TempDeNoiser# a |
|
38 |
+TempDeNoiser# E e e |
|
39 | 39 |
|
40 | 40 |
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work |
41 | 41 |
# more or less selfinvented filters so the exactness isnt too meaningfull |
... | ... |
@@ -61,6 +61,7 @@ split this huge file |
61 | 61 |
border remover |
62 | 62 |
optimize c versions |
63 | 63 |
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
64 |
+smart blur |
|
64 | 65 |
... |
65 | 66 |
|
66 | 67 |
Notes: |
... | ... |
@@ -2592,10 +2593,294 @@ static inline void transpose2(uint8_t *dst, int dstStride, uint8_t *src) |
2592 | 2592 |
); |
2593 | 2593 |
} |
2594 | 2594 |
#endif |
2595 |
+//static int test=0; |
|
2595 | 2596 |
|
2596 | 2597 |
static void inline tempNoiseReducer(uint8_t *src, int stride, |
2597 | 2598 |
uint8_t *tempBlured, int *maxNoise) |
2598 | 2599 |
{ |
2600 |
+#define FAST_L2_DIFF |
|
2601 |
+//#define L1_DIFF //u should change the thresholds too if u try that one |
|
2602 |
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
|
2603 |
+ asm volatile( |
|
2604 |
+ "leal (%2, %2, 2), %%eax \n\t" // 3*stride |
|
2605 |
+ "leal (%2, %2, 4), %%ebx \n\t" // 5*stride |
|
2606 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2607 |
+// 0 1 2 3 4 5 6 7 8 9 |
|
2608 |
+// %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+ebx %x+2eax %x+ecx %x+8%2 |
|
2609 |
+//FIXME reorder? |
|
2610 |
+#ifdef L1_DIFF //needs mmx2 |
|
2611 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2612 |
+ "psadbw (%1), %%mm0 \n\t" // |L0-R0| |
|
2613 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2614 |
+ "psadbw (%1, %2), %%mm1 \n\t" // |L1-R1| |
|
2615 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2616 |
+ "psadbw (%1, %2, 2), %%mm2 \n\t" // |L2-R2| |
|
2617 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2618 |
+ "psadbw (%1, %%eax), %%mm3 \n\t" // |L3-R3| |
|
2619 |
+ |
|
2620 |
+ "movq (%0, %2, 4), %%mm4 \n\t" // L4 |
|
2621 |
+ "paddw %%mm1, %%mm0 \n\t" |
|
2622 |
+ "psadbw (%1, %2, 4), %%mm4 \n\t" // |L4-R4| |
|
2623 |
+ "movq (%0, %%ebx), %%mm5 \n\t" // L5 |
|
2624 |
+ "paddw %%mm2, %%mm0 \n\t" |
|
2625 |
+ "psadbw (%1, %%ebx), %%mm5 \n\t" // |L5-R5| |
|
2626 |
+ "movq (%0, %%eax, 2), %%mm6 \n\t" // L6 |
|
2627 |
+ "paddw %%mm3, %%mm0 \n\t" |
|
2628 |
+ "psadbw (%1, %%eax, 2), %%mm6 \n\t" // |L6-R6| |
|
2629 |
+ "movq (%0, %%ecx), %%mm7 \n\t" // L7 |
|
2630 |
+ "paddw %%mm4, %%mm0 \n\t" |
|
2631 |
+ "psadbw (%1, %%ecx), %%mm7 \n\t" // |L7-R7| |
|
2632 |
+ "paddw %%mm5, %%mm6 \n\t" |
|
2633 |
+ "paddw %%mm7, %%mm6 \n\t" |
|
2634 |
+ "paddw %%mm6, %%mm0 \n\t" |
|
2635 |
+#elif defined (FAST_L2_DIFF) |
|
2636 |
+ "pcmpeqb %%mm7, %%mm7 \n\t" |
|
2637 |
+ "movq b80, %%mm6 \n\t" |
|
2638 |
+ "pxor %%mm0, %%mm0 \n\t" |
|
2639 |
+#define L2_DIFF_CORE(a, b)\ |
|
2640 |
+ "movq " #a ", %%mm5 \n\t"\ |
|
2641 |
+ "movq " #b ", %%mm2 \n\t"\ |
|
2642 |
+ "pxor %%mm7, %%mm2 \n\t"\ |
|
2643 |
+ PAVGB(%%mm2, %%mm5)\ |
|
2644 |
+ "paddb %%mm6, %%mm5 \n\t"\ |
|
2645 |
+ "movq %%mm5, %%mm2 \n\t"\ |
|
2646 |
+ "psllw $8, %%mm5 \n\t"\ |
|
2647 |
+ "pmaddwd %%mm5, %%mm5 \n\t"\ |
|
2648 |
+ "pmaddwd %%mm2, %%mm2 \n\t"\ |
|
2649 |
+ "paddd %%mm2, %%mm5 \n\t"\ |
|
2650 |
+ "psrld $14, %%mm5 \n\t"\ |
|
2651 |
+ "paddd %%mm5, %%mm0 \n\t" |
|
2652 |
+ |
|
2653 |
+L2_DIFF_CORE((%0), (%1)) |
|
2654 |
+L2_DIFF_CORE((%0, %2), (%1, %2)) |
|
2655 |
+L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2)) |
|
2656 |
+L2_DIFF_CORE((%0, %%eax), (%1, %%eax)) |
|
2657 |
+L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4)) |
|
2658 |
+L2_DIFF_CORE((%0, %%ebx), (%1, %%ebx)) |
|
2659 |
+L2_DIFF_CORE((%0, %%eax,2), (%1, %%eax,2)) |
|
2660 |
+L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) |
|
2661 |
+ |
|
2662 |
+#else |
|
2663 |
+ "pxor %%mm7, %%mm7 \n\t" |
|
2664 |
+ "pxor %%mm0, %%mm0 \n\t" |
|
2665 |
+#define L2_DIFF_CORE(a, b)\ |
|
2666 |
+ "movq " #a ", %%mm5 \n\t"\ |
|
2667 |
+ "movq " #b ", %%mm2 \n\t"\ |
|
2668 |
+ "movq %%mm5, %%mm1 \n\t"\ |
|
2669 |
+ "movq %%mm2, %%mm3 \n\t"\ |
|
2670 |
+ "punpcklbw %%mm7, %%mm5 \n\t"\ |
|
2671 |
+ "punpckhbw %%mm7, %%mm1 \n\t"\ |
|
2672 |
+ "punpcklbw %%mm7, %%mm2 \n\t"\ |
|
2673 |
+ "punpckhbw %%mm7, %%mm3 \n\t"\ |
|
2674 |
+ "psubw %%mm2, %%mm5 \n\t"\ |
|
2675 |
+ "psubw %%mm3, %%mm1 \n\t"\ |
|
2676 |
+ "pmaddwd %%mm5, %%mm5 \n\t"\ |
|
2677 |
+ "pmaddwd %%mm1, %%mm1 \n\t"\ |
|
2678 |
+ "paddd %%mm1, %%mm5 \n\t"\ |
|
2679 |
+ "paddd %%mm5, %%mm0 \n\t" |
|
2680 |
+ |
|
2681 |
+L2_DIFF_CORE((%0), (%1)) |
|
2682 |
+L2_DIFF_CORE((%0, %2), (%1, %2)) |
|
2683 |
+L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2)) |
|
2684 |
+L2_DIFF_CORE((%0, %%eax), (%1, %%eax)) |
|
2685 |
+L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4)) |
|
2686 |
+L2_DIFF_CORE((%0, %%ebx), (%1, %%ebx)) |
|
2687 |
+L2_DIFF_CORE((%0, %%eax,2), (%1, %%eax,2)) |
|
2688 |
+L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) |
|
2689 |
+ |
|
2690 |
+#endif |
|
2691 |
+ |
|
2692 |
+ "movq %%mm0, %%mm4 \n\t" |
|
2693 |
+ "psrlq $32, %%mm0 \n\t" |
|
2694 |
+ "paddd %%mm0, %%mm4 \n\t" |
|
2695 |
+ "movd %%mm4, %%ecx \n\t" |
|
2696 |
+// "movl %3, %%ecx \n\t" |
|
2697 |
+// "movl %%ecx, test \n\t" |
|
2698 |
+// "jmp 4f \n\t" |
|
2699 |
+ "cmpl %4, %%ecx \n\t" |
|
2700 |
+ " jb 2f \n\t" |
|
2701 |
+ "cmpl %5, %%ecx \n\t" |
|
2702 |
+ " jb 1f \n\t" |
|
2703 |
+ |
|
2704 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2705 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2706 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2707 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2708 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2709 |
+ "movq (%0, %2, 4), %%mm4 \n\t" // L4 |
|
2710 |
+ "movq (%0, %%ebx), %%mm5 \n\t" // L5 |
|
2711 |
+ "movq (%0, %%eax, 2), %%mm6 \n\t" // L6 |
|
2712 |
+ "movq (%0, %%ecx), %%mm7 \n\t" // L7 |
|
2713 |
+ "movq %%mm0, (%1) \n\t" // L0 |
|
2714 |
+ "movq %%mm1, (%1, %2) \n\t" // L1 |
|
2715 |
+ "movq %%mm2, (%1, %2, 2) \n\t" // L2 |
|
2716 |
+ "movq %%mm3, (%1, %%eax) \n\t" // L3 |
|
2717 |
+ "movq %%mm4, (%1, %2, 4) \n\t" // L4 |
|
2718 |
+ "movq %%mm5, (%1, %%ebx) \n\t" // L5 |
|
2719 |
+ "movq %%mm6, (%1, %%eax, 2) \n\t" // L6 |
|
2720 |
+ "movq %%mm7, (%1, %%ecx) \n\t" // L7 |
|
2721 |
+ "jmp 4f \n\t" |
|
2722 |
+ |
|
2723 |
+ "1: \n\t" |
|
2724 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2725 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2726 |
+ "pavgb (%1), %%mm0 \n\t" // L0 |
|
2727 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2728 |
+ "pavgb (%1, %2), %%mm1 \n\t" // L1 |
|
2729 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2730 |
+ "pavgb (%1, %2, 2), %%mm2 \n\t" // L2 |
|
2731 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2732 |
+ "pavgb (%1, %%eax), %%mm3 \n\t" // L3 |
|
2733 |
+ "movq (%0, %2, 4), %%mm4 \n\t" // L4 |
|
2734 |
+ "pavgb (%1, %2, 4), %%mm4 \n\t" // L4 |
|
2735 |
+ "movq (%0, %%ebx), %%mm5 \n\t" // L5 |
|
2736 |
+ "pavgb (%1, %%ebx), %%mm5 \n\t" // L5 |
|
2737 |
+ "movq (%0, %%eax, 2), %%mm6 \n\t" // L6 |
|
2738 |
+ "pavgb (%1, %%eax, 2), %%mm6 \n\t" // L6 |
|
2739 |
+ "movq (%0, %%ecx), %%mm7 \n\t" // L7 |
|
2740 |
+ "pavgb (%1, %%ecx), %%mm7 \n\t" // L7 |
|
2741 |
+ "movq %%mm0, (%1) \n\t" // R0 |
|
2742 |
+ "movq %%mm1, (%1, %2) \n\t" // R1 |
|
2743 |
+ "movq %%mm2, (%1, %2, 2) \n\t" // R2 |
|
2744 |
+ "movq %%mm3, (%1, %%eax) \n\t" // R3 |
|
2745 |
+ "movq %%mm4, (%1, %2, 4) \n\t" // R4 |
|
2746 |
+ "movq %%mm5, (%1, %%ebx) \n\t" // R5 |
|
2747 |
+ "movq %%mm6, (%1, %%eax, 2) \n\t" // R6 |
|
2748 |
+ "movq %%mm7, (%1, %%ecx) \n\t" // R7 |
|
2749 |
+ "movq %%mm0, (%0) \n\t" // L0 |
|
2750 |
+ "movq %%mm1, (%0, %2) \n\t" // L1 |
|
2751 |
+ "movq %%mm2, (%0, %2, 2) \n\t" // L2 |
|
2752 |
+ "movq %%mm3, (%0, %%eax) \n\t" // L3 |
|
2753 |
+ "movq %%mm4, (%0, %2, 4) \n\t" // L4 |
|
2754 |
+ "movq %%mm5, (%0, %%ebx) \n\t" // L5 |
|
2755 |
+ "movq %%mm6, (%0, %%eax, 2) \n\t" // L6 |
|
2756 |
+ "movq %%mm7, (%0, %%ecx) \n\t" // L7 |
|
2757 |
+ "jmp 4f \n\t" |
|
2758 |
+ |
|
2759 |
+ "2: \n\t" |
|
2760 |
+ "cmpl %3, %%ecx \n\t" |
|
2761 |
+ " jb 3f \n\t" |
|
2762 |
+ |
|
2763 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2764 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2765 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2766 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2767 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2768 |
+ "movq (%1), %%mm4 \n\t" // R0 |
|
2769 |
+ "movq (%1, %2), %%mm5 \n\t" // R1 |
|
2770 |
+ "movq (%1, %2, 2), %%mm6 \n\t" // R2 |
|
2771 |
+ "movq (%1, %%eax), %%mm7 \n\t" // R3 |
|
2772 |
+ PAVGB(%%mm4, %%mm0) |
|
2773 |
+ PAVGB(%%mm5, %%mm1) |
|
2774 |
+ PAVGB(%%mm6, %%mm2) |
|
2775 |
+ PAVGB(%%mm7, %%mm3) |
|
2776 |
+ PAVGB(%%mm4, %%mm0) |
|
2777 |
+ PAVGB(%%mm5, %%mm1) |
|
2778 |
+ PAVGB(%%mm6, %%mm2) |
|
2779 |
+ PAVGB(%%mm7, %%mm3) |
|
2780 |
+ "movq %%mm0, (%1) \n\t" // R0 |
|
2781 |
+ "movq %%mm1, (%1, %2) \n\t" // R1 |
|
2782 |
+ "movq %%mm2, (%1, %2, 2) \n\t" // R2 |
|
2783 |
+ "movq %%mm3, (%1, %%eax) \n\t" // R3 |
|
2784 |
+ "movq %%mm0, (%0) \n\t" // L0 |
|
2785 |
+ "movq %%mm1, (%0, %2) \n\t" // L1 |
|
2786 |
+ "movq %%mm2, (%0, %2, 2) \n\t" // L2 |
|
2787 |
+ "movq %%mm3, (%0, %%eax) \n\t" // L3 |
|
2788 |
+ |
|
2789 |
+ "movq (%0, %2, 4), %%mm0 \n\t" // L4 |
|
2790 |
+ "movq (%0, %%ebx), %%mm1 \n\t" // L5 |
|
2791 |
+ "movq (%0, %%eax, 2), %%mm2 \n\t" // L6 |
|
2792 |
+ "movq (%0, %%ecx), %%mm3 \n\t" // L7 |
|
2793 |
+ "movq (%1, %2, 4), %%mm4 \n\t" // R4 |
|
2794 |
+ "movq (%1, %%ebx), %%mm5 \n\t" // R5 |
|
2795 |
+ "movq (%1, %%eax, 2), %%mm6 \n\t" // R6 |
|
2796 |
+ "movq (%1, %%ecx), %%mm7 \n\t" // R7 |
|
2797 |
+ PAVGB(%%mm4, %%mm0) |
|
2798 |
+ PAVGB(%%mm5, %%mm1) |
|
2799 |
+ PAVGB(%%mm6, %%mm2) |
|
2800 |
+ PAVGB(%%mm7, %%mm3) |
|
2801 |
+ PAVGB(%%mm4, %%mm0) |
|
2802 |
+ PAVGB(%%mm5, %%mm1) |
|
2803 |
+ PAVGB(%%mm6, %%mm2) |
|
2804 |
+ PAVGB(%%mm7, %%mm3) |
|
2805 |
+ "movq %%mm0, (%1, %2, 4) \n\t" // R4 |
|
2806 |
+ "movq %%mm1, (%1, %%ebx) \n\t" // R5 |
|
2807 |
+ "movq %%mm2, (%1, %%eax, 2) \n\t" // R6 |
|
2808 |
+ "movq %%mm3, (%1, %%ecx) \n\t" // R7 |
|
2809 |
+ "movq %%mm0, (%0, %2, 4) \n\t" // L4 |
|
2810 |
+ "movq %%mm1, (%0, %%ebx) \n\t" // L5 |
|
2811 |
+ "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 |
|
2812 |
+ "movq %%mm3, (%0, %%ecx) \n\t" // L7 |
|
2813 |
+ "jmp 4f \n\t" |
|
2814 |
+ |
|
2815 |
+ "3: \n\t" |
|
2816 |
+ "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
|
2817 |
+ "movq (%0), %%mm0 \n\t" // L0 |
|
2818 |
+ "movq (%0, %2), %%mm1 \n\t" // L1 |
|
2819 |
+ "movq (%0, %2, 2), %%mm2 \n\t" // L2 |
|
2820 |
+ "movq (%0, %%eax), %%mm3 \n\t" // L3 |
|
2821 |
+ "movq (%1), %%mm4 \n\t" // R0 |
|
2822 |
+ "movq (%1, %2), %%mm5 \n\t" // R1 |
|
2823 |
+ "movq (%1, %2, 2), %%mm6 \n\t" // R2 |
|
2824 |
+ "movq (%1, %%eax), %%mm7 \n\t" // R3 |
|
2825 |
+ PAVGB(%%mm4, %%mm0) |
|
2826 |
+ PAVGB(%%mm5, %%mm1) |
|
2827 |
+ PAVGB(%%mm6, %%mm2) |
|
2828 |
+ PAVGB(%%mm7, %%mm3) |
|
2829 |
+ PAVGB(%%mm4, %%mm0) |
|
2830 |
+ PAVGB(%%mm5, %%mm1) |
|
2831 |
+ PAVGB(%%mm6, %%mm2) |
|
2832 |
+ PAVGB(%%mm7, %%mm3) |
|
2833 |
+ PAVGB(%%mm4, %%mm0) |
|
2834 |
+ PAVGB(%%mm5, %%mm1) |
|
2835 |
+ PAVGB(%%mm6, %%mm2) |
|
2836 |
+ PAVGB(%%mm7, %%mm3) |
|
2837 |
+ "movq %%mm0, (%1) \n\t" // R0 |
|
2838 |
+ "movq %%mm1, (%1, %2) \n\t" // R1 |
|
2839 |
+ "movq %%mm2, (%1, %2, 2) \n\t" // R2 |
|
2840 |
+ "movq %%mm3, (%1, %%eax) \n\t" // R3 |
|
2841 |
+ "movq %%mm0, (%0) \n\t" // L0 |
|
2842 |
+ "movq %%mm1, (%0, %2) \n\t" // L1 |
|
2843 |
+ "movq %%mm2, (%0, %2, 2) \n\t" // L2 |
|
2844 |
+ "movq %%mm3, (%0, %%eax) \n\t" // L3 |
|
2845 |
+ |
|
2846 |
+ "movq (%0, %2, 4), %%mm0 \n\t" // L4 |
|
2847 |
+ "movq (%0, %%ebx), %%mm1 \n\t" // L5 |
|
2848 |
+ "movq (%0, %%eax, 2), %%mm2 \n\t" // L6 |
|
2849 |
+ "movq (%0, %%ecx), %%mm3 \n\t" // L7 |
|
2850 |
+ "movq (%1, %2, 4), %%mm4 \n\t" // R4 |
|
2851 |
+ "movq (%1, %%ebx), %%mm5 \n\t" // R5 |
|
2852 |
+ "movq (%1, %%eax, 2), %%mm6 \n\t" // R6 |
|
2853 |
+ "movq (%1, %%ecx), %%mm7 \n\t" // R7 |
|
2854 |
+ PAVGB(%%mm4, %%mm0) |
|
2855 |
+ PAVGB(%%mm5, %%mm1) |
|
2856 |
+ PAVGB(%%mm6, %%mm2) |
|
2857 |
+ PAVGB(%%mm7, %%mm3) |
|
2858 |
+ PAVGB(%%mm4, %%mm0) |
|
2859 |
+ PAVGB(%%mm5, %%mm1) |
|
2860 |
+ PAVGB(%%mm6, %%mm2) |
|
2861 |
+ PAVGB(%%mm7, %%mm3) |
|
2862 |
+ PAVGB(%%mm4, %%mm0) |
|
2863 |
+ PAVGB(%%mm5, %%mm1) |
|
2864 |
+ PAVGB(%%mm6, %%mm2) |
|
2865 |
+ PAVGB(%%mm7, %%mm3) |
|
2866 |
+ "movq %%mm0, (%1, %2, 4) \n\t" // R4 |
|
2867 |
+ "movq %%mm1, (%1, %%ebx) \n\t" // R5 |
|
2868 |
+ "movq %%mm2, (%1, %%eax, 2) \n\t" // R6 |
|
2869 |
+ "movq %%mm3, (%1, %%ecx) \n\t" // R7 |
|
2870 |
+ "movq %%mm0, (%0, %2, 4) \n\t" // L4 |
|
2871 |
+ "movq %%mm1, (%0, %%ebx) \n\t" // L5 |
|
2872 |
+ "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 |
|
2873 |
+ "movq %%mm3, (%0, %%ecx) \n\t" // L7 |
|
2874 |
+ |
|
2875 |
+ "4: \n\t" |
|
2876 |
+ |
|
2877 |
+ :: "r" (src), "r" (tempBlured), "r"(stride), |
|
2878 |
+ "m"(maxNoise[0]), "m"(maxNoise[1]), "m"(maxNoise[2]) |
|
2879 |
+ : "%eax", "%ebx", "%ecx", "memory" |
|
2880 |
+ ); |
|
2881 |
+//printf("%d\n", test); |
|
2882 |
+#else |
|
2599 | 2883 |
int y; |
2600 | 2884 |
int d=0; |
2601 | 2885 |
int sysd=0; |
... | ... |
@@ -2608,7 +2893,10 @@ static void inline tempNoiseReducer(uint8_t *src, int stride, |
2608 | 2608 |
int ref= tempBlured[ x + y*stride ]; |
2609 | 2609 |
int cur= src[ x + y*stride ]; |
2610 | 2610 |
int d1=ref - cur; |
2611 |
- d+= ABS(d1); //d1*d1; |
|
2611 |
+// if(x==0 || x==7) d1+= d1>>1; |
|
2612 |
+// if(y==0 || y==7) d1+= d1>>1; |
|
2613 |
+// d+= ABS(d1); |
|
2614 |
+ d+= d1*d1; |
|
2612 | 2615 |
sysd+= d1; |
2613 | 2616 |
} |
2614 | 2617 |
} |
... | ... |
@@ -2682,6 +2970,7 @@ Switch between |
2682 | 2682 |
} |
2683 | 2683 |
} |
2684 | 2684 |
} |
2685 |
+#endif |
|
2685 | 2686 |
} |
2686 | 2687 |
|
2687 | 2688 |
#ifdef HAVE_ODIVX_POSTPROCESS |
... | ... |
@@ -2914,9 +3203,9 @@ void postprocess(unsigned char * src[], int src_stride, |
2914 | 2914 |
ppMode.lumMode= mode; |
2915 | 2915 |
mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00); |
2916 | 2916 |
ppMode.chromMode= mode; |
2917 |
- ppMode.maxTmpNoise[0]= 150; |
|
2918 |
- ppMode.maxTmpNoise[1]= 200; |
|
2919 |
- ppMode.maxTmpNoise[2]= 400; |
|
2917 |
+ ppMode.maxTmpNoise[0]= 700; |
|
2918 |
+ ppMode.maxTmpNoise[1]= 1500; |
|
2919 |
+ ppMode.maxTmpNoise[2]= 3000; |
|
2920 | 2920 |
|
2921 | 2921 |
#ifdef HAVE_ODIVX_POSTPROCESS |
2922 | 2922 |
// Note: I could make this shit outside of this file, but it would mean one |