GitList

Browse code

Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win on ev6.

Originally committed as revision 979 to svn://svn.ffmpeg.org/ffmpeg/trunk

Falk Hüffner authored on 2002/09/30 00:14:28
Showing 4 changed files

libavcodec/Makefile index 0d54b0c..d17c368 100644
libavcodec/alpha/dsputil_alpha.c index fef86fe..3bf2290 100644
libavcodec/alpha/motion_est_alpha.c index b0968d1..804e1d2 100644
libavcodec/alpha/motion_est_mvi_asm.S index 0000000..0042e7e

@@ -68,7 +68,7 @@ endif
                      # alpha specific stuff
                      ifeq ($(TARGET_ARCH_ALPHA),yes)
                      OBJS += alpha/dsputil_alpha.o alpha/mpegvideo_alpha.o alpha/motion_est_alpha.o
                     -ASM_OBJS += alpha/dsputil_alpha_asm.o
                     +ASM_OBJS += alpha/dsputil_alpha_asm.o alpha/motion_est_mvi_asm.o
                      CFLAGS += -fforce-addr -freduce-all-givs
                      endif

libavcodec/alpha/dsputil_alpha.c

History View file @ f9bb4bd

@@ -34,7 +34,7 @@ void get_pixels_mvi(DCTELEM *restrict block,
                      void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
                                           int stride);
                      int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
                     -int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
                     +int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size);
                      int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
                      int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
                      int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
@@ -335,7 +335,7 @@ void dsputil_init_alpha(void)
                              get_pixels       = get_pixels_mvi;
                              diff_pixels      = diff_pixels_mvi;
                              pix_abs8x8       = pix_abs8x8_mvi;
                     -        pix_abs16x16     = pix_abs16x16_mvi;
                     +        pix_abs16x16     = pix_abs16x16_mvi_asm;
                              pix_abs16x16_x2  = pix_abs16x16_x2_mvi;
                              pix_abs16x16_y2  = pix_abs16x16_y2_mvi;
                              pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi;

libavcodec/alpha/motion_est_alpha.c

History View file @ f9bb4bd

@@ -117,6 +117,7 @@ int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
     return result;
 }
 
+#if 0				/* now done in assembly */
 int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
 {
     int result = 0;
@@ -157,6 +158,7 @@ int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
 
     return result;
 }
+#endif
 
 int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
 {

libavcodec/alpha/motion_est_mvi_asm.S

History View file @ f9bb4bd

                     new file mode 100644
@@ -0,0 +1,186 @@
                     +/*
                     + * Alpha optimized DSP utils
                     + * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
                     + *
                     + * This program is free software; you can redistribute it and/or modify
                     + * it under the terms of the GNU General Public License as published by
                     + * the Free Software Foundation; either version 2 of the License, or
                     + * (at your option) any later version.
                     + *
                     + * This program is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     + * GNU General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU General Public License
                     + * along with this program; if not, write to the Free Software
                     + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
                     + */
+                    +
                     +#include "regdef.h"
                     +#ifdef HAVE_AV_CONFIG_H
                     +#include "config.h"
                     +#endif
+                    +
                     +/* Some nicer register names.  */
                     +#define ta t10
                     +#define tb t11
                     +#define tc t12
                     +#define td AT
                     +/* Danger: these overlap with the argument list and the return value */
                     +#define te a5
                     +#define tf a4
                     +#define tg a3
                     +#define th v0
+                    +
                     +        .set noat
                     +        .set noreorder
                     +        .arch pca56
                     +        .text
+                    +
                     +/*****************************************************************************
                     + * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
                     + *
                     + * This code is written with a pca56 in mind. For ev6, one should
                     + * really take the increased latency of 3 cycles for MVI instructions
                     + * into account.
                     + *
                     + * It is important to keep the loading and first use of a register as
                     + * far apart as possible, because if a register is accessed before it
                     + * has been fetched from memory, the CPU will stall.
                     + */
                     +        .align 4
                     +        .globl pix_abs16x16_mvi_asm
                     +        .ent pix_abs16x16_mvi_asm
                     +pix_abs16x16_mvi_asm:
                     +        .frame sp, 0, ra, 0
                     +        .prologue 0
+                    +
                     +#ifdef HAVE_GPROF
                     +        lda     AT, _mcount
                     +        jsr     AT, (AT), _mcount
                     +#endif
+                    +
                     +        and     a1, 7, t0
                     +        clr     v0
                     +        lda     a3, 16
                     +        beq     t0, $aligned
                     +        .align 4
                     +$unaligned:
                     +        /* Registers:
                     +           line 0:
                     +           t0:  left_u -> left lo -> left
                     +           t1:  mid
                     +           t2:  right_u -> right hi -> right
                     +           t3:  ref left
                     +           t4:  ref right
                     +           line 1:
                     +           t5:  left_u -> left lo -> left
                     +           t6:  mid
                     +           t7:  right_u -> right hi -> right
                     +           t8:  ref left
                     +           t9:  ref right
                     +           temp:
                     +           ta:  left hi
                     +           tb:  right lo
                     +           tc:  error left
                     +           td:  error right  */
+                    +
                     +        /* load line 0 */
                     +        ldq_u   t0, 0(a1)       # left_u
                     +        ldq_u   t1, 8(a1)       # mid
                     +        ldq_u   t2, 16(a1)      # right_u
                     +        ldq     t3, 0(a0)       # ref left
                     +        ldq     t4, 8(a0)       # ref right
                     +        addq    a0, a2, a0      # pix1
                     +        addq    a1, a2, a1      # pix2
                     +        /* load line 1 */
                     +        ldq_u   t5, 0(a1)       # left_u
                     +        ldq_u   t6, 8(a1)       # mid
                     +        ldq_u   t7, 16(a1)      # right_u
                     +        ldq     t8, 0(a0)       # ref left
                     +        ldq     t9, 8(a0)       # ref right
                     +        addq    a0, a2, a0      # pix1
                     +        addq    a1, a2, a1      # pix2
                     +        /* calc line 0 */
                     +        extql   t0, a1, t0      # left lo
                     +        extqh   t1, a1, ta      # left hi
                     +        extql   t1, a1, tb      # right lo
                     +        or      t0, ta, t0      # left
                     +        extqh   t2, a1, t2      # right hi
                     +        perr    t3, t0, tc      # error left
                     +        or      t2, tb, t2      # right
                     +        perr    t4, t2, td      # error right
                     +        addq    v0, tc, v0      # add error left
                     +        addq    v0, td, v0      # add error left
                     +        /* calc line 1 */
                     +        extql   t5, a1, t5      # left lo
                     +        extqh   t6, a1, ta      # left hi
                     +        extql   t6, a1, tb      # right lo
                     +        or      t5, ta, t5      # left
                     +        extqh   t7, a1, t7      # right hi
                     +        perr    t8, t5, tc      # error left
                     +        or      t7, tb, t7      # right
                     +        perr    t9, t7, td      # error right
                     +        addq    v0, tc, v0      # add error left
                     +        addq    v0, td, v0      # add error left
                     +        /* loop */
                     +        subq    a3,  2, a3      # h -= 2
                     +        bne     a3, $unaligned
                     +        ret
+                    +
                     +        .align 4
                     +$aligned:
                     +        /* load line 0 */
                     +        ldq     t0, 0(a1)       # left
                     +        ldq     t1, 8(a1)       # right
                     +        addq    a1, a2, a1      # pix2
                     +        ldq     t2, 0(a0)       # ref left
                     +        ldq     t3, 8(a0)       # ref right
                     +        addq    a0, a2, a0      # pix1
                     +        /* load line 1 */
                     +        ldq     t4, 0(a1)       # left
                     +        ldq     t5, 8(a1)       # right
                     +        addq    a1, a2, a1      # pix2
                     +        ldq     t6, 0(a0)       # ref left
                     +        ldq     t7, 8(a0)       # ref right
                     +        addq    a0, a2, a0      # pix1
                     +        /* load line 2 */
                     +        ldq     t8, 0(a1)       # left
                     +        ldq     t9, 8(a1)       # right
                     +        addq    a1, a2, a1      # pix2
                     +        ldq     ta, 0(a0)       # ref left
                     +        ldq     tb, 8(a0)       # ref right
                     +        addq    a0, a2, a0      # pix1
                     +        /* load line 3 */
                     +        ldq     tc, 0(a1)       # left
                     +        ldq     td, 8(a1)       # right
                     +        addq    a1, a2, a1      # pix2
                     +        ldq     te, 0(a0)       # ref left
                     +        ldq     tf, 8(a0)       # ref right
                     +        /* calc line 0 */
                     +        perr    t0, t2, t0      # error left
                     +        addq    a0, a2, a0      # pix1
                     +        perr    t1, t3, t1      # error right
                     +        addq    v0, t0, v0      # add error left
                     +        /* calc line 1 */
                     +        perr    t4, t6, t0      # error left
                     +        addq    v0, t1, v0      # add error right
                     +        perr    t5, t7, t1      # error right
                     +        addq    v0, t0, v0      # add error left
                     +        /* calc line 2 */
                     +        perr    t8, ta, t0      # error left
                     +        addq    v0, t1, v0      # add error right
                     +        perr    t9, tb, t1      # error right
                     +        addq    v0, t0, v0      # add error left
                     +        /* calc line 3 */
                     +        perr    tc, te, t0      # error left
                     +        addq    v0, t1, v0      # add error right
                     +        perr    td, tf, t1      # error right
                     +        addq    v0, t0, v0      # add error left
                     +        addq    v0, t1, v0      # add error right
                     +        /* loop */
                     +        subq    a3,  4, a3      # h -= 4
                     +        bne     a3, $aligned
                     +        ret
                     +        .end pix_abs16x16_mvi_asm

...	...	@@ -117,6 +117,7 @@ int pix_abs8x8_mvi(uint8_t pix1, uint8_t pix2, int line_size)
117	117	return result;
118	118	}
119	119
	120	+#if 0 /* now done in assembly */
120	121	int pix_abs16x16_mvi(uint8_t pix1, uint8_t pix2, int line_size)
121	122	{
122	123	int result = 0;
...	...	@@ -157,6 +158,7 @@ int pix_abs16x16_mvi(uint8_t pix1, uint8_t pix2, int line_size)
157	157
158	158	return result;
159	159	}
	160	+#endif
160	161
161	162	int pix_abs16x16_x2_mvi(uint8_t pix1, uint8_t pix2, int line_size)
162	163	{